Spaces:
Running
Running
theo
commited on
Commit
•
8860d0f
1
Parent(s):
c4882f0
fix inputs on others
Browse files- tagging_app.py +19 -14
tagging_app.py
CHANGED
@@ -5,9 +5,14 @@ from typing import Callable, Dict, List, Tuple
|
|
5 |
import langcodes as lc
|
6 |
import streamlit as st
|
7 |
import yaml
|
8 |
-
from datasets.utils.metadata import (
|
9 |
-
|
10 |
-
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
st.set_page_config(
|
13 |
page_title="HF Dataset Tagging App",
|
@@ -173,21 +178,21 @@ state["task_categories"] = multiselect(
|
|
173 |
format_func=lambda tg: f"{tg}: {known_task_ids[tg]['description']}",
|
174 |
)
|
175 |
task_specifics = []
|
176 |
-
for
|
177 |
specs = multiselect(
|
178 |
leftcol,
|
179 |
-
f"Specific _{
|
180 |
f"What specific tasks does the dataset support?",
|
181 |
-
values=[ts for ts in (state["task_ids"] or []) if ts in known_task_ids[
|
182 |
-
valid_set=known_task_ids[
|
183 |
)
|
184 |
if "other" in specs:
|
185 |
-
other_task =
|
186 |
"You selected 'other' task. Please enter a short hyphen-separated description for the task:",
|
187 |
value="my-task-description",
|
188 |
)
|
189 |
-
|
190 |
-
specs[specs.index("other")] = f"{
|
191 |
task_specifics += specs
|
192 |
state["task_ids"] = task_specifics
|
193 |
|
@@ -203,11 +208,11 @@ state["multilinguality"] = multiselect(
|
|
203 |
)
|
204 |
|
205 |
if "other" in state["multilinguality"]:
|
206 |
-
other_multilinguality =
|
207 |
"You selected 'other' type of multilinguality. Please enter a short hyphen-separated description:",
|
208 |
value="my-multilinguality",
|
209 |
)
|
210 |
-
|
211 |
state["multilinguality"][state["multilinguality"].index("other")] = f"other-{other_multilinguality}"
|
212 |
|
213 |
valid_values, invalid_values = list(), list()
|
@@ -286,11 +291,11 @@ if "extended" in state["extended"]:
|
|
286 |
valid_set=all_dataset_ids + ["other"],
|
287 |
)
|
288 |
if "other" in extended_sources:
|
289 |
-
other_extended_sources =
|
290 |
"You selected 'other' dataset. Please enter a short hyphen-separated description:",
|
291 |
value="my-dataset",
|
292 |
)
|
293 |
-
|
294 |
extended_sources[extended_sources.index("other")] = f"other-{other_extended_sources}"
|
295 |
state["source_datasets"] += [f"extended|{src}" for src in extended_sources]
|
296 |
|
|
|
5 |
import langcodes as lc
|
6 |
import streamlit as st
|
7 |
import yaml
|
8 |
+
from datasets.utils.metadata import (
|
9 |
+
DatasetMetadata,
|
10 |
+
known_creators,
|
11 |
+
known_licenses,
|
12 |
+
known_multilingualities,
|
13 |
+
known_size_categories,
|
14 |
+
known_task_ids,
|
15 |
+
)
|
16 |
|
17 |
st.set_page_config(
|
18 |
page_title="HF Dataset Tagging App",
|
|
|
178 |
format_func=lambda tg: f"{tg}: {known_task_ids[tg]['description']}",
|
179 |
)
|
180 |
task_specifics = []
|
181 |
+
for task_category in state["task_categories"]:
|
182 |
specs = multiselect(
|
183 |
leftcol,
|
184 |
+
f"Specific _{task_category}_ tasks",
|
185 |
f"What specific tasks does the dataset support?",
|
186 |
+
values=[ts for ts in (state["task_ids"] or []) if ts in known_task_ids[task_category]["options"]],
|
187 |
+
valid_set=known_task_ids[task_category]["options"],
|
188 |
)
|
189 |
if "other" in specs:
|
190 |
+
other_task = leftcol.text_input(
|
191 |
"You selected 'other' task. Please enter a short hyphen-separated description for the task:",
|
192 |
value="my-task-description",
|
193 |
)
|
194 |
+
leftcol.write(f"Registering {task_category}-other-{other_task} task")
|
195 |
+
specs[specs.index("other")] = f"{task_category}-other-{other_task}"
|
196 |
task_specifics += specs
|
197 |
state["task_ids"] = task_specifics
|
198 |
|
|
|
208 |
)
|
209 |
|
210 |
if "other" in state["multilinguality"]:
|
211 |
+
other_multilinguality = leftcol.text_input(
|
212 |
"You selected 'other' type of multilinguality. Please enter a short hyphen-separated description:",
|
213 |
value="my-multilinguality",
|
214 |
)
|
215 |
+
leftcol.write(f"Registering other-{other_multilinguality} multilinguality")
|
216 |
state["multilinguality"][state["multilinguality"].index("other")] = f"other-{other_multilinguality}"
|
217 |
|
218 |
valid_values, invalid_values = list(), list()
|
|
|
291 |
valid_set=all_dataset_ids + ["other"],
|
292 |
)
|
293 |
if "other" in extended_sources:
|
294 |
+
other_extended_sources = leftcol.text_input(
|
295 |
"You selected 'other' dataset. Please enter a short hyphen-separated description:",
|
296 |
value="my-dataset",
|
297 |
)
|
298 |
+
leftcol.write(f"Registering other-{other_extended_sources} dataset")
|
299 |
extended_sources[extended_sources.index("other")] = f"other-{other_extended_sources}"
|
300 |
state["source_datasets"] += [f"extended|{src}" for src in extended_sources]
|
301 |
|