Spaces:
Running
Running
Align more metadata with other repo types (models,spaces)
#2
by
julien-c
HF staff
- opened
- .github/workflows/deploy.yml +0 -37
- apputils.py +2 -2
- tagging_app.py +7 -14
.github/workflows/deploy.yml
DELETED
@@ -1,37 +0,0 @@
|
|
1 |
-
name: Deployment
|
2 |
-
|
3 |
-
on:
|
4 |
-
push:
|
5 |
-
branches:
|
6 |
-
- main
|
7 |
-
workflow_dispatch:
|
8 |
-
|
9 |
-
jobs:
|
10 |
-
deployment:
|
11 |
-
runs-on: ubuntu-latest
|
12 |
-
steps:
|
13 |
-
- name: Check out
|
14 |
-
uses: actions/checkout@v2
|
15 |
-
with:
|
16 |
-
fetch-depth: 0
|
17 |
-
- name: Set up Python
|
18 |
-
uses: actions/setup-python@v2
|
19 |
-
with:
|
20 |
-
python-version: "3.6"
|
21 |
-
- name: Install dependencies
|
22 |
-
run: |
|
23 |
-
python -m pip install --upgrade pip
|
24 |
-
pip install -r requirements.txt
|
25 |
-
- name: Build metadata file
|
26 |
-
run: |
|
27 |
-
python build_metadata_file.py
|
28 |
-
- name: Commit metadata file
|
29 |
-
run: |
|
30 |
-
git config user.name github-actions
|
31 |
-
git config user.email [email protected]
|
32 |
-
git add -f metadata_*
|
33 |
-
git commit -m "Add metadata file"
|
34 |
-
- name: Push to Hub
|
35 |
-
env:
|
36 |
-
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
37 |
-
run: git push --force https://albertvillanova:[email protected]/spaces/huggingface/datasets-tagging main
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
apputils.py
CHANGED
@@ -6,11 +6,11 @@ def new_state() -> Dict[str, List]:
|
|
6 |
"task_categories": [],
|
7 |
"task_ids": [],
|
8 |
"multilinguality": [],
|
9 |
-
"
|
10 |
"language_creators": [],
|
11 |
"annotations_creators": [],
|
12 |
"source_datasets": [],
|
13 |
"size_categories": [],
|
14 |
-
"
|
15 |
"pretty_name": None,
|
16 |
}
|
|
|
6 |
"task_categories": [],
|
7 |
"task_ids": [],
|
8 |
"multilinguality": [],
|
9 |
+
"language": [],
|
10 |
"language_creators": [],
|
11 |
"annotations_creators": [],
|
12 |
"source_datasets": [],
|
13 |
"size_categories": [],
|
14 |
+
"license": [],
|
15 |
"pretty_name": None,
|
16 |
}
|
tagging_app.py
CHANGED
@@ -258,7 +258,7 @@ if "other" in state["multilinguality"]:
|
|
258 |
state["multilinguality"][state["multilinguality"].index("other")] = f"other-{other_multilinguality}"
|
259 |
|
260 |
valid_values, invalid_values = list(), list()
|
261 |
-
for langtag in state["
|
262 |
try:
|
263 |
lc.get(langtag)
|
264 |
valid_values.append(langtag)
|
@@ -273,7 +273,7 @@ langtags = leftcol.text_area(
|
|
273 |
"What languages are represented in the dataset? expected format is BCP47 tags separated for ';' e.g. 'en;fr'",
|
274 |
value=";".join(valid_values),
|
275 |
)
|
276 |
-
state["
|
277 |
|
278 |
|
279 |
#
|
@@ -297,23 +297,16 @@ state["annotations_creators"] = multiselect(
|
|
297 |
|
298 |
|
299 |
#
|
300 |
-
#
|
301 |
#
|
302 |
-
state["
|
303 |
leftcol,
|
304 |
-
"
|
305 |
-
"What
|
306 |
valid_set=list(known_licenses.keys()),
|
307 |
-
values=state["
|
308 |
format_func=lambda l: f"{l} : {known_licenses[l]}",
|
309 |
)
|
310 |
-
if "other" in state["licenses"]:
|
311 |
-
other_license = st.text_input(
|
312 |
-
"You selected 'other' type of license. Please enter a short hyphen-separated description:",
|
313 |
-
value="my-license",
|
314 |
-
)
|
315 |
-
st.write(f"Registering other-{other_license} license")
|
316 |
-
state["licenses"][state["licenses"].index("other")] = f"other-{other_license}"
|
317 |
|
318 |
|
319 |
#
|
|
|
258 |
state["multilinguality"][state["multilinguality"].index("other")] = f"other-{other_multilinguality}"
|
259 |
|
260 |
valid_values, invalid_values = list(), list()
|
261 |
+
for langtag in state["language"]:
|
262 |
try:
|
263 |
lc.get(langtag)
|
264 |
valid_values.append(langtag)
|
|
|
273 |
"What languages are represented in the dataset? expected format is BCP47 tags separated for ';' e.g. 'en;fr'",
|
274 |
value=";".join(valid_values),
|
275 |
)
|
276 |
+
state["language"] = langtags.strip().split(";") if langtags.strip() != "" else []
|
277 |
|
278 |
|
279 |
#
|
|
|
297 |
|
298 |
|
299 |
#
|
300 |
+
# LICENSE
|
301 |
#
|
302 |
+
state["license"] = multiselect(
|
303 |
leftcol,
|
304 |
+
"License",
|
305 |
+
"What license(s) is the dataset under?",
|
306 |
valid_set=list(known_licenses.keys()),
|
307 |
+
values=state["license"],
|
308 |
format_func=lambda l: f"{l} : {known_licenses[l]}",
|
309 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
310 |
|
311 |
|
312 |
#
|