davidberenstein1957 HF staff commited on
Commit
2d84a88
·
1 Parent(s): e1cb58c

add task categories

Browse files
src/synthetic_dataset_generator/_distiset.py CHANGED
@@ -81,6 +81,15 @@ class CustomDistisetWithAdditionalTag(distilabel.distiset.Distiset):
81
  dataset[0] if not isinstance(dataset, dict) else dataset["train"][0]
82
  )
83
 
 
 
 
 
 
 
 
 
 
84
  readme_metadata = {}
85
  if repo_id and token:
86
  readme_metadata = self._extract_readme_metadata(repo_id, token)
@@ -90,6 +99,7 @@ class CustomDistisetWithAdditionalTag(distilabel.distiset.Distiset):
90
  "size_categories": size_categories_parser(
91
  max(len(dataset) for dataset in self.values())
92
  ),
 
93
  "tags": [
94
  "synthetic",
95
  "distilabel",
 
81
  dataset[0] if not isinstance(dataset, dict) else dataset["train"][0]
82
  )
83
 
84
+ keys = list(sample_records.keys())
85
+ if len(keys) != 2 or not (
86
+ ("label" in keys and "text" in keys)
87
+ or ("labels" in keys and "text" in keys)
88
+ ):
89
+ task_categories = ["text-classification"]
90
+ elif "prompt" in keys or "messages" in keys:
91
+ task_categories = ["text-generation", "text2text-generation"]
92
+
93
  readme_metadata = {}
94
  if repo_id and token:
95
  readme_metadata = self._extract_readme_metadata(repo_id, token)
 
99
  "size_categories": size_categories_parser(
100
  max(len(dataset) for dataset in self.values())
101
  ),
102
+ "task_categories": task_categories,
103
  "tags": [
104
  "synthetic",
105
  "distilabel",