lhoestq HF staff commited on
Commit
43b024e
Β·
1 Parent(s): 641f494

rename files

Browse files
Dockerfile CHANGED
@@ -20,8 +20,8 @@ RUN pip install --no-cache-dir --upgrade pip
20
  COPY --chown=user run.py app.py requirements.txt README.md $HOME/app/
21
 
22
  # Install dependencies
23
- RUN pip install "gradio[oauth]" fire
24
  RUN pip install -r requirements.txt
25
 
26
  # Run app
27
- ENTRYPOINT python app.py
 
20
  COPY --chown=user run.py app.py requirements.txt README.md $HOME/app/
21
 
22
  # Install dependencies
23
+ RUN pip install "gradio[oauth]"
24
  RUN pip install -r requirements.txt
25
 
26
  # Run app
27
+ ENTRYPOINT python start_app.py
requirements.txt CHANGED
@@ -1,3 +1,4 @@
 
1
  duckdb
2
  huggingface_hub
3
  tabulate
 
1
+ fire
2
  duckdb
3
  huggingface_hub
4
  tabulate
run.py β†’ run_job.py RENAMED
@@ -50,11 +50,13 @@ def sql(src: str, dst: str, query: str, config: str = "default", split: str = "t
50
  src_kwargs = con.sql(CMD_SRC_KWARGS.format(src=src, config=config, split=split)).df().to_dict(orient="records")
51
  if not src_kwargs:
52
  raise ValueError(f'Invalid --config "{config}" for dataset "{src}", please select a valid dataset config/subset.')
 
53
  con.sql((CMD_SRC_DRY_RUN if dry_run else CMD_SRC).format(**src_kwargs[0]))
54
  if dry_run:
55
- print(f"Sample data from '{src}' that would be written to '{dst}':\n")
56
  else:
57
  con.sql("PRAGMA enable_progress_bar;")
 
58
  result = con.sql((CMD_DST_DRY_RUN if dry_run else CMD_DST).format(query=query.rstrip("\n ;")))
59
  if dry_run:
60
  print(result.df().to_markdown())
 
50
  src_kwargs = con.sql(CMD_SRC_KWARGS.format(src=src, config=config, split=split)).df().to_dict(orient="records")
51
  if not src_kwargs:
52
  raise ValueError(f'Invalid --config "{config}" for dataset "{src}", please select a valid dataset config/subset.')
53
+
54
  con.sql((CMD_SRC_DRY_RUN if dry_run else CMD_SRC).format(**src_kwargs[0]))
55
  if dry_run:
56
+ print(f"Sample data from '{src}' that would be written to dataset '{dst}':\n")
57
  else:
58
  con.sql("PRAGMA enable_progress_bar;")
59
+
60
  result = con.sql((CMD_DST_DRY_RUN if dry_run else CMD_DST).format(query=query.rstrip("\n ;")))
61
  if dry_run:
62
  print(result.df().to_markdown())
app.py β†’ start_app.py RENAMED
@@ -1,18 +1,20 @@
 
1
  import re
2
  import subprocess
3
  import yaml
4
 
5
  import gradio as gr
6
  import requests
7
- from huggingface_hub import HfApi
8
 
9
 
10
- CMD = ["python" ,"run.py"]
11
 
12
  with open("README.md") as f:
13
  METADATA = yaml.safe_load(f.read().split("---\n")[1])
14
  TITLE = METADATA["title"]
15
  EMOJI = METADATA["emoji"]
 
16
 
17
  try:
18
  process = subprocess.run(CMD + ["--help"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
@@ -22,26 +24,68 @@ except Exception:
22
 
23
  DRY_RUN = bool(HELP) and bool(m :=re.search("--dry(-|_)run", HELP)) and m.group(0)
24
 
25
- def update_pbars(pbars: dict[str, float], line: str):
26
  if (percent_match := re.search("\\d+(?:\\.\\d+)?%", line)) and any(c in line.split("%")[1][:10] for c in "|β–ˆβ–Œ"):
27
  [pbars.pop(desc) for desc, percent in pbars.items() if percent == 1.]
28
  percent = float(percent_match.group(0)[:-1]) / 100
29
  desc = line[:percent_match.start()].strip() or "Progress"
30
  pbars[desc] = percent
 
 
 
31
 
32
  def dry_run(src, config, split, dst, query):
33
  if not all([src, config, split, dst, query]):
34
  raise gr.Error("Please fill source, destination and query.")
35
- process = subprocess.Popen(CMD + ["--src", src, "--config", config, "--split", split, "--dst", dst, "--query", query, DRY_RUN], stdout=subprocess.PIPE)
36
- logs = ""
 
 
 
37
  for line in iter(process.stdout.readline, b""):
38
  logs += line.decode()
39
- yield {output_markdown: logs, progress_labels: gr.Label(visible=False)}
40
 
41
- def run(src, config, split, dst, query):
42
  if not all([src, config, split, dst, query]):
43
  raise gr.Error("Please fill source, destination and query.")
44
- raise gr.Error("NotImplemented")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
  READ_FUNCTIONS = ("pl.read_parquet", "pl.read_csv", "pl.read_json")
47
  NUM_TRENDING_DATASETS = 10
@@ -51,17 +95,17 @@ with gr.Blocks() as demo:
51
  with gr.Column(scale=10):
52
  gr.Markdown(f"# {TITLE} {EMOJI}")
53
  with gr.Column():
54
- gr.LoginButton(scale=0.1)
55
  with gr.Row():
56
- with gr.Column():
57
  with gr.Row():
58
  loading_codes_json = gr.JSON([], visible=False)
59
  dataset_dropdown = gr.Dropdown(label="Source Dataset", allow_custom_value=True, scale=10)
60
  subset_dropdown = gr.Dropdown(info="Subset", allow_custom_value=True, show_label=False, visible=False)
61
  split_dropdown = gr.Dropdown(info="Split", allow_custom_value=True, show_label=False, visible=False)
62
- with gr.Column(scale=0.1, min_width=60):
63
  gr.HTML("<div style='font-size: 4em;'>β†’</div>")
64
- with gr.Column():
65
  dst_dropdown = gr.Dropdown(label="Destination Dataset", allow_custom_value=True)
66
  query_textarea = gr.TextArea(label="SQL Query", placeholder="SELECT * FROM src;", value="SELECT * FROM src;", container=False, show_label=False)
67
  with gr.Row():
 
1
+ import os
2
  import re
3
  import subprocess
4
  import yaml
5
 
6
  import gradio as gr
7
  import requests
8
+ from huggingface_hub import HfApi, get_token
9
 
10
 
11
+ CMD = ["python" ,"run_job.py"]
12
 
13
  with open("README.md") as f:
14
  METADATA = yaml.safe_load(f.read().split("---\n")[1])
15
  TITLE = METADATA["title"]
16
  EMOJI = METADATA["emoji"]
17
+ spaceId = os.environ.get("SPACE_ID") or "lhoestq/run-duckdb"
18
 
19
  try:
20
  process = subprocess.run(CMD + ["--help"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 
24
 
25
  DRY_RUN = bool(HELP) and bool(m :=re.search("--dry(-|_)run", HELP)) and m.group(0)
26
 
27
+ def parse_log(line: str, pbars: dict[str, float]):
28
  if (percent_match := re.search("\\d+(?:\\.\\d+)?%", line)) and any(c in line.split("%")[1][:10] for c in "|β–ˆβ–Œ"):
29
  [pbars.pop(desc) for desc, percent in pbars.items() if percent == 1.]
30
  percent = float(percent_match.group(0)[:-1]) / 100
31
  desc = line[:percent_match.start()].strip() or "Progress"
32
  pbars[desc] = percent
33
+ yield ""
34
+ else:
35
+ yield line
36
 
37
  def dry_run(src, config, split, dst, query):
38
  if not all([src, config, split, dst, query]):
39
  raise gr.Error("Please fill source, destination and query.")
40
+ args = ["--src", src, "--config", config, "--split", split, "--dst", dst, "--query", query, DRY_RUN]
41
+ cmd = CMD + args
42
+ logs = "Job:\n\n```bash\n" + " ".join('"' + arg.replace('"', '\"""') + '"' if " " in arg else arg for arg in cmd) + "\n```\nOutput:\n\n"
43
+ yield {output_markdown: logs, progress_labels: gr.Label(visible=False)}
44
+ process = subprocess.Popen(cmd, stdout=subprocess.PIPE)
45
  for line in iter(process.stdout.readline, b""):
46
  logs += line.decode()
47
+ yield {output_markdown: logs}
48
 
49
+ def run(src, config, split, dst, query, oauth_token: gr.OAuthToken | None, profile: gr.OAuthProfile | None):
50
  if not all([src, config, split, dst, query]):
51
  raise gr.Error("Please fill source, destination and query.")
52
+ if oauth_token and profile:
53
+ token = oauth_token.token
54
+ username = profile.username
55
+ elif (token := get_token()):
56
+ username = HfApi().whoami(token=token)["name"]
57
+ else:
58
+ raise gr.Error("Please log in to run the job.")
59
+ args = ["--src", src, "--config", config, "--split", split, "--dst", dst, "--query", query]
60
+ cmd = CMD + args
61
+ logs = "Job:\n\n```bash\n" + " ".join('"' + arg.replace('"', '\"""') + '"' if " " in arg else arg for arg in cmd) + "\n```\nOutput:\n\n"
62
+ pbars = {}
63
+ yield {output_markdown: logs, progress_labels: gr.Label(pbars, visible=bool(pbars))}
64
+ resp = requests.post(
65
+ f"https://huggingface.co/api/jobs/{username}",
66
+ json={
67
+ "spaceId": spaceId,
68
+ "arguments": args,
69
+ "command": CMD,
70
+ "environment": {},
71
+ "flavor": "cpu-basic"
72
+ },
73
+ headers={"Authorization": f"Bearer {token}"}
74
+ )
75
+ if resp.status_code != 200:
76
+ logs += resp.text
77
+ pbars = {"Finished with an error ❌": 1.0}
78
+ else:
79
+ job_id = resp.json()["metadata"]["job_id"]
80
+ resp = requests.get(
81
+ f"https://huggingface.co/api/jobs/{username}/{job_id}/logs-stream",
82
+ headers={"Authorization": f"Bearer {token}"}
83
+ )
84
+ for line in iter(resp.raw.readline, b""):
85
+ logs += parse_log(line.decode(), pbars=pbars)
86
+ yield {output_markdown: logs, progress_labels: gr.Label(pbars, visible=bool(pbars))}
87
+ pbars = {"Finished" + (" βœ…" if process.returncode == 0 else " with an error ❌"): 1.0}
88
+ yield {output_markdown: logs, progress_labels: gr.Label(pbars, visible=bool(pbars))}
89
 
90
  READ_FUNCTIONS = ("pl.read_parquet", "pl.read_csv", "pl.read_json")
91
  NUM_TRENDING_DATASETS = 10
 
95
  with gr.Column(scale=10):
96
  gr.Markdown(f"# {TITLE} {EMOJI}")
97
  with gr.Column():
98
+ gr.LoginButton()
99
  with gr.Row():
100
+ with gr.Column(scale=10):
101
  with gr.Row():
102
  loading_codes_json = gr.JSON([], visible=False)
103
  dataset_dropdown = gr.Dropdown(label="Source Dataset", allow_custom_value=True, scale=10)
104
  subset_dropdown = gr.Dropdown(info="Subset", allow_custom_value=True, show_label=False, visible=False)
105
  split_dropdown = gr.Dropdown(info="Split", allow_custom_value=True, show_label=False, visible=False)
106
+ with gr.Column(min_width=60):
107
  gr.HTML("<div style='font-size: 4em;'>β†’</div>")
108
+ with gr.Column(scale=10):
109
  dst_dropdown = gr.Dropdown(label="Destination Dataset", allow_custom_value=True)
110
  query_textarea = gr.TextArea(label="SQL Query", placeholder="SELECT * FROM src;", value="SELECT * FROM src;", container=False, show_label=False)
111
  with gr.Row():