lhoestq HF staff julien-c HF staff commited on
Commit
c2de6fa
1 Parent(s): f208d84

Align more metadata with other repo types (models,spaces) (#2)

Browse files

- rm GitHub Action (4a3daec80d4aed1eaee735aec7c87dd38354f998)
- `licenses:` => `license:` (35237ea94ff6c278cca5b56e1fa3c92b28564c35)
- `languages:` => `language:` (560591921d23cfc84360565ab9bc17be8da9f0f8)


Co-authored-by: Julien Chaumond <[email protected]>

Files changed (3) hide show
  1. .github/workflows/deploy.yml +0 -37
  2. apputils.py +2 -2
  3. tagging_app.py +7 -14
.github/workflows/deploy.yml DELETED
@@ -1,37 +0,0 @@
1
- name: Deployment
2
-
3
- on:
4
- push:
5
- branches:
6
- - main
7
- workflow_dispatch:
8
-
9
- jobs:
10
- deployment:
11
- runs-on: ubuntu-latest
12
- steps:
13
- - name: Check out
14
- uses: actions/checkout@v2
15
- with:
16
- fetch-depth: 0
17
- - name: Set up Python
18
- uses: actions/setup-python@v2
19
- with:
20
- python-version: "3.6"
21
- - name: Install dependencies
22
- run: |
23
- python -m pip install --upgrade pip
24
- pip install -r requirements.txt
25
- - name: Build metadata file
26
- run: |
27
- python build_metadata_file.py
28
- - name: Commit metadata file
29
- run: |
30
- git config user.name github-actions
31
- git config user.email [email protected]
32
- git add -f metadata_*
33
- git commit -m "Add metadata file"
34
- - name: Push to Hub
35
- env:
36
- HF_TOKEN: ${{ secrets.HF_TOKEN }}
37
- run: git push --force https://albertvillanova:[email protected]/spaces/huggingface/datasets-tagging main
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
apputils.py CHANGED
@@ -6,11 +6,11 @@ def new_state() -> Dict[str, List]:
6
  "task_categories": [],
7
  "task_ids": [],
8
  "multilinguality": [],
9
- "languages": [],
10
  "language_creators": [],
11
  "annotations_creators": [],
12
  "source_datasets": [],
13
  "size_categories": [],
14
- "licenses": [],
15
  "pretty_name": None,
16
  }
 
6
  "task_categories": [],
7
  "task_ids": [],
8
  "multilinguality": [],
9
+ "language": [],
10
  "language_creators": [],
11
  "annotations_creators": [],
12
  "source_datasets": [],
13
  "size_categories": [],
14
+ "license": [],
15
  "pretty_name": None,
16
  }
tagging_app.py CHANGED
@@ -258,7 +258,7 @@ if "other" in state["multilinguality"]:
258
  state["multilinguality"][state["multilinguality"].index("other")] = f"other-{other_multilinguality}"
259
 
260
  valid_values, invalid_values = list(), list()
261
- for langtag in state["languages"]:
262
  try:
263
  lc.get(langtag)
264
  valid_values.append(langtag)
@@ -273,7 +273,7 @@ langtags = leftcol.text_area(
273
  "What languages are represented in the dataset? expected format is BCP47 tags separated for ';' e.g. 'en;fr'",
274
  value=";".join(valid_values),
275
  )
276
- state["languages"] = langtags.strip().split(";") if langtags.strip() != "" else []
277
 
278
 
279
  #
@@ -297,23 +297,16 @@ state["annotations_creators"] = multiselect(
297
 
298
 
299
  #
300
- # LICENSES
301
  #
302
- state["licenses"] = multiselect(
303
  leftcol,
304
- "Licenses",
305
- "What licenses is the dataset under?",
306
  valid_set=list(known_licenses.keys()),
307
- values=state["licenses"],
308
  format_func=lambda l: f"{l} : {known_licenses[l]}",
309
  )
310
- if "other" in state["licenses"]:
311
- other_license = st.text_input(
312
- "You selected 'other' type of license. Please enter a short hyphen-separated description:",
313
- value="my-license",
314
- )
315
- st.write(f"Registering other-{other_license} license")
316
- state["licenses"][state["licenses"].index("other")] = f"other-{other_license}"
317
 
318
 
319
  #
 
258
  state["multilinguality"][state["multilinguality"].index("other")] = f"other-{other_multilinguality}"
259
 
260
  valid_values, invalid_values = list(), list()
261
+ for langtag in state["language"]:
262
  try:
263
  lc.get(langtag)
264
  valid_values.append(langtag)
 
273
  "What languages are represented in the dataset? expected format is BCP47 tags separated for ';' e.g. 'en;fr'",
274
  value=";".join(valid_values),
275
  )
276
+ state["language"] = langtags.strip().split(";") if langtags.strip() != "" else []
277
 
278
 
279
  #
 
297
 
298
 
299
  #
300
+ # LICENSE
301
  #
302
+ state["license"] = multiselect(
303
  leftcol,
304
+ "License",
305
+ "What license(s) is the dataset under?",
306
  valid_set=list(known_licenses.keys()),
307
+ values=state["license"],
308
  format_func=lambda l: f"{l} : {known_licenses[l]}",
309
  )
 
 
 
 
 
 
 
310
 
311
 
312
  #