severo HF staff commited on
Commit
418a37b
1 Parent(s): 47c9fe2

create app

Browse files
Files changed (4) hide show
  1. .gitignore +1 -0
  2. app.py +67 -0
  3. poetry.lock +0 -0
  4. pyproject.toml +18 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ __pycache__
app.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from huggingface_hub import hf_hub_download
3
+ import subprocess
4
+
5
+ def get_dataset_dependencies(dataset: str) -> set[str]:
6
+ script_name = dataset.split("/")[-1] + ".py"
7
+ input_file = hf_hub_download(repo_id=dataset, filename=script_name, repo_type="dataset")
8
+ result = subprocess.run(["findpydeps", "-i", input_file, "--no-header"], capture_output=True, text=True)
9
+ return set(d for d in result.stdout.split("\n") if d)
10
+
11
+ def update(datasets: str):
12
+ all_dependencies = set()
13
+ for dataset in datasets.split("\n"):
14
+ dataset = dataset.strip()
15
+ print(dataset)
16
+ if not dataset:
17
+ continue
18
+ try:
19
+ dependencies = get_dataset_dependencies(dataset)
20
+ print(f"Dependencies for {dataset} processed: {len(dependencies)}")
21
+ all_dependencies.update(dependencies)
22
+ except Exception as e:
23
+ print(f"Error processing {dataset}: {e}")
24
+ continue
25
+ return "\n".join(sorted(list(all_dependencies)))
26
+
27
+ with gr.Blocks() as demo:
28
+ gr.Markdown("""# Script-based dataset dependencies
29
+
30
+ Paste a list of newline-separated dataset names, and then click **Run** to see the list of dependencies in their scripts.
31
+ """)
32
+ with gr.Row():
33
+ inp = gr.Textbox(placeholder="mnist\ncifar10", label="Datasets", lines=10, max_lines=10)
34
+ out = gr.Textbox(label="Dependencies", lines=10, max_lines=10, show_copy_button=True)
35
+ btn = gr.Button("Run")
36
+ examples = ["mnist\ncifar10", "mnist", """espnet/yodas
37
+ gaia-benchmark/GAIA
38
+ google/fleurs
39
+ mozilla-foundation/common_voice_1_0
40
+ mozilla-foundation/common_voice_10_0
41
+ mozilla-foundation/common_voice_11_0
42
+ mozilla-foundation/common_voice_12_0
43
+ mozilla-foundation/common_voice_13_0
44
+ mozilla-foundation/common_voice_14_0
45
+ mozilla-foundation/common_voice_15_0
46
+ mozilla-foundation/common_voice_16_0
47
+ mozilla-foundation/common_voice_16_1
48
+ mozilla-foundation/common_voice_2_0
49
+ mozilla-foundation/common_voice_3_0
50
+ mozilla-foundation/common_voice_4_0
51
+ mozilla-foundation/common_voice_5_0
52
+ mozilla-foundation/common_voice_5_1
53
+ mozilla-foundation/common_voice_6_0
54
+ mozilla-foundation/common_voice_6_1
55
+ mozilla-foundation/common_voice_7_0
56
+ mozilla-foundation/common_voice_8_0
57
+ mozilla-foundation/common_voice_9_0
58
+ poloclub/diffusiondb
59
+ pufanyi/MIMICIT
60
+ speechcolab/gigaspeech
61
+ togethercomputer/RedPajama-Data-1T
62
+ togethercomputer/RedPajama-Data-V2
63
+ """ ]
64
+ gr.Examples(examples, inp, label="Example Datasets", )
65
+ btn.click(fn=update, inputs=inp, outputs=out)
66
+
67
+ demo.launch()
poetry.lock ADDED
The diff for this file is too large to render. See raw diff
 
pyproject.toml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [tool.poetry]
2
+ name = "find-script-based-datasets-dependencies"
3
+ version = "0.1.0"
4
+ description = ""
5
+ authors = ["Sylvain Lesage <[email protected]>"]
6
+ readme = "README.md"
7
+
8
+ [tool.poetry.dependencies]
9
+ python = "^3.9"
10
+ gradio = "4.23.0"
11
+ findpydeps = "^0.2.6"
12
+ pip = "^24.0"
13
+ huggingface-hub = "^0.22.1"
14
+
15
+
16
+ [build-system]
17
+ requires = ["poetry-core"]
18
+ build-backend = "poetry.core.masonry.api"