severo's picture
severo HF staff
create app
418a37b
import gradio as gr
from huggingface_hub import hf_hub_download
import subprocess
def get_dataset_dependencies(dataset: str) -> set[str]:
script_name = dataset.split("/")[-1] + ".py"
input_file = hf_hub_download(repo_id=dataset, filename=script_name, repo_type="dataset")
result = subprocess.run(["findpydeps", "-i", input_file, "--no-header"], capture_output=True, text=True)
return set(d for d in result.stdout.split("\n") if d)
def update(datasets: str):
all_dependencies = set()
for dataset in datasets.split("\n"):
dataset = dataset.strip()
print(dataset)
if not dataset:
continue
try:
dependencies = get_dataset_dependencies(dataset)
print(f"Dependencies for {dataset} processed: {len(dependencies)}")
all_dependencies.update(dependencies)
except Exception as e:
print(f"Error processing {dataset}: {e}")
continue
return "\n".join(sorted(list(all_dependencies)))
with gr.Blocks() as demo:
gr.Markdown("""# Script-based dataset dependencies
Paste a list of newline-separated dataset names, and then click **Run** to see the list of dependencies in their scripts.
""")
with gr.Row():
inp = gr.Textbox(placeholder="mnist\ncifar10", label="Datasets", lines=10, max_lines=10)
out = gr.Textbox(label="Dependencies", lines=10, max_lines=10, show_copy_button=True)
btn = gr.Button("Run")
examples = ["mnist\ncifar10", "mnist", """espnet/yodas
gaia-benchmark/GAIA
google/fleurs
mozilla-foundation/common_voice_1_0
mozilla-foundation/common_voice_10_0
mozilla-foundation/common_voice_11_0
mozilla-foundation/common_voice_12_0
mozilla-foundation/common_voice_13_0
mozilla-foundation/common_voice_14_0
mozilla-foundation/common_voice_15_0
mozilla-foundation/common_voice_16_0
mozilla-foundation/common_voice_16_1
mozilla-foundation/common_voice_2_0
mozilla-foundation/common_voice_3_0
mozilla-foundation/common_voice_4_0
mozilla-foundation/common_voice_5_0
mozilla-foundation/common_voice_5_1
mozilla-foundation/common_voice_6_0
mozilla-foundation/common_voice_6_1
mozilla-foundation/common_voice_7_0
mozilla-foundation/common_voice_8_0
mozilla-foundation/common_voice_9_0
poloclub/diffusiondb
pufanyi/MIMICIT
speechcolab/gigaspeech
togethercomputer/RedPajama-Data-1T
togethercomputer/RedPajama-Data-V2
""" ]
gr.Examples(examples, inp, label="Example Datasets", )
btn.click(fn=update, inputs=inp, outputs=out)
demo.launch()