Yacine Jernite commited on
Commit
183ff95
2 Parent(s): 3684e08 6f1bd41

Merge pull request #8 from huggingface/prepare_features_for_markdown

Browse files
Files changed (1) hide show
  1. tagging_app.py +54 -28
tagging_app.py CHANGED
@@ -57,45 +57,68 @@ creator_set = {
57
  ########################
58
 
59
  @st.cache
60
- def filter_features(features):
61
  if isinstance(features, list):
62
- return dict([(k, filter_features(v)) for k, v in features[0].items()])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  elif features.get("_type", None) == 'Value':
64
- return {
65
  "feature_type": features["_type"],
66
  "dtype": features["dtype"],
67
  }
68
- elif features.get("_type", None) == 'Sequence':
69
- if "dtype" in features["feature"]:
70
- return {
71
- "feature_type": features["_type"],
72
- "feature": filter_features(features["feature"]),
73
- }
74
- elif "_type" in features["feature"] and features["feature"]["_type"] == "ClassLabel":
75
- return {
76
- "feature_type": features["_type"],
77
- "dtype": "int32",
78
- "feature": filter_features(features["feature"]),
79
- }
80
  else:
81
- return dict(
82
- [("feature_type", features["_type"])] + \
83
- [(k, filter_features(v)) for k, v in features["feature"].items()]
84
- )
85
  elif features.get("_type", None) == 'ClassLabel':
86
- return {
87
  "feature_type": features["_type"],
88
  "dtype": "int32",
89
  "class_names": features["names"],
90
  }
 
 
 
 
 
91
  elif features.get("_type", None) in ['Translation', 'TranslationVariableLanguages']:
92
- return {
93
  "feature_type": features["_type"],
94
  "dtype": "string",
95
  "languages": features["languages"],
96
  }
 
 
 
 
 
97
  else:
98
- return dict([(k, filter_features(v)) for k, v in features.items()])
 
 
 
 
 
 
99
 
100
 
101
  @st.cache
@@ -227,7 +250,7 @@ config_id = st.sidebar.selectbox(
227
 
228
  config_infos = all_info_dicts[config_id]
229
 
230
- c1, _, c2, _, c3 = st.beta_columns([8, 1, 14, 1, 10])
231
 
232
  ########################
233
  ## Dataset description
@@ -243,8 +266,8 @@ with c1.beta_expander("Dataset description:", expanded=True):
243
  st.markdown(config_infos['description'])
244
 
245
  # "pretty-fy" the features to be a little easier to read
246
- features = filter_features(config_infos['features'])
247
- with c1.beta_expander(f"Dataset features for config: {config_id}", expanded=True):
248
  st.write(features)
249
 
250
  ########################
@@ -444,9 +467,6 @@ if c3.button("Done? Save to File!"):
444
  _ = os.mkdir(pjoin('saved_tags', dataset_id, config_id))
445
  json.dump(res, open(pjoin('saved_tags', dataset_id, config_id, 'tags.json'), 'w'))
446
 
447
- with c3.beta_expander("Show JSON output for the current config"):
448
- st.write(res)
449
-
450
  with c3.beta_expander("Show YAML output aggregating the tags saved for all configs"):
451
  task_saved_configs = dict([
452
  (Path(fname).parent.name, json.load(open(fname)))
@@ -465,6 +485,12 @@ with c3.beta_expander("Show YAML output aggregating the tags saved for all confi
465
  aggregate_config[tag_k][conf_name] = list(aggregate_config[tag_k][conf_name])
466
  st.text(yaml.dump(aggregate_config))
467
 
 
 
 
 
 
 
468
  c3.markdown("--- ")
469
 
470
  with c3.beta_expander("----> show full task set <----", expanded=True):
 
57
  ########################
58
 
59
  @st.cache
60
+ def filter_features(features, name="", is_sequence=False):
61
  if isinstance(features, list):
62
+ return filter_features(features[0], name, is_sequence=True)
63
+ elif features.get("_type", None) == 'Sequence':
64
+ if "dtype" in features["feature"] or ("_type" in features["feature"] and features["feature"]["_type"] == "ClassLabel"):
65
+ pre_filtered, desc = filter_features(features["feature"], name, is_sequence=True)
66
+ filtered = {
67
+ "feature_type": features["_type"],
68
+ "feature": pre_filtered,
69
+ }
70
+ return filtered, desc
71
+ else:
72
+ filtered = {"feature_type": features["_type"]}
73
+ if is_sequence:
74
+ desc = [f"- `{name}`: a `list` of dictionary features containing:"]
75
+ else:
76
+ desc = [f"- `{name}`: a dictionary feature containing:"]
77
+ for k, v in features["feature"].items():
78
+ pre_filtered, pre_desc = filter_features(v, name=k)
79
+ filtered[k] = pre_filtered
80
+ desc += [" " + d for d in pre_desc]
81
+ return filtered, desc
82
  elif features.get("_type", None) == 'Value':
83
+ filtered = {
84
  "feature_type": features["_type"],
85
  "dtype": features["dtype"],
86
  }
87
+ if is_sequence:
88
+ desc = f"- `{name}`: a `list` of `{features['dtype']}` features."
 
 
 
 
 
 
 
 
 
 
89
  else:
90
+ desc = f"- `{name}`: a `{features['dtype']}` feature."
91
+ return filtered, [desc]
 
 
92
  elif features.get("_type", None) == 'ClassLabel':
93
+ filtered = {
94
  "feature_type": features["_type"],
95
  "dtype": "int32",
96
  "class_names": features["names"],
97
  }
98
+ if is_sequence:
99
+ desc = f"- `{name}`: a `list` of classification labels, with possible values including {', '.join(['`'+nm+'`' for nm in features['names'][:5]])}."
100
+ else:
101
+ desc = f"- `{name}`: a classification label, with possible values including {', '.join(['`'+nm+'`' for nm in features['names'][:5]])}."
102
+ return filtered, [desc]
103
  elif features.get("_type", None) in ['Translation', 'TranslationVariableLanguages']:
104
+ filtered = {
105
  "feature_type": features["_type"],
106
  "dtype": "string",
107
  "languages": features["languages"],
108
  }
109
+ if is_sequence:
110
+ desc = f"- `{name}`: a `list` of multilingual `string` variables, with possible languages including {', '.join(['`'+nm+'`' for nm in features['languages'][:5]])}."
111
+ else:
112
+ desc = f"- `{name}`: a multilingual `string` variable, with possible languages including {', '.join(['`'+nm+'`' for nm in features['languages'][:5]])}."
113
+ return filtered, [desc]
114
  else:
115
+ filtered = {}
116
+ desc = []
117
+ for k, v in features.items():
118
+ pre_filtered, pre_desc = filter_features(v, name=k)
119
+ filtered[k] = pre_filtered
120
+ desc += pre_desc
121
+ return filtered, desc
122
 
123
 
124
  @st.cache
 
250
 
251
  config_infos = all_info_dicts[config_id]
252
 
253
+ c1, _, c2, _, c3 = st.beta_columns([8, 1, 12, 1, 12])
254
 
255
  ########################
256
  ## Dataset description
 
266
  st.markdown(config_infos['description'])
267
 
268
  # "pretty-fy" the features to be a little easier to read
269
+ features, feature_descs = filter_features(config_infos['features'])
270
+ with c1.beta_expander(f"Dataset features for config: {config_id}", expanded=False):
271
  st.write(features)
272
 
273
  ########################
 
467
  _ = os.mkdir(pjoin('saved_tags', dataset_id, config_id))
468
  json.dump(res, open(pjoin('saved_tags', dataset_id, config_id, 'tags.json'), 'w'))
469
 
 
 
 
470
  with c3.beta_expander("Show YAML output aggregating the tags saved for all configs"):
471
  task_saved_configs = dict([
472
  (Path(fname).parent.name, json.load(open(fname)))
 
485
  aggregate_config[tag_k][conf_name] = list(aggregate_config[tag_k][conf_name])
486
  st.text(yaml.dump(aggregate_config))
487
 
488
+ with c3.beta_expander(f"Show Markdown Data Features for config: {config_id}"):
489
+ st.text('\n'.join(feature_descs))
490
+
491
+ with c3.beta_expander("Show JSON output for the current config"):
492
+ st.write(res)
493
+
494
  c3.markdown("--- ")
495
 
496
  with c3.beta_expander("----> show full task set <----", expanded=True):