Commit
·
86f370f
1
Parent(s):
2e449ff
fix final nits and formatting
Browse files- assets/image.png +0 -0
- assets/ui-full.png +0 -0
- src/synthetic_dataset_generator/apps/base.py +37 -41
- src/synthetic_dataset_generator/apps/eval.py +1 -1
- src/synthetic_dataset_generator/apps/sft.py +3 -5
- src/synthetic_dataset_generator/apps/textcat.py +3 -5
- src/synthetic_dataset_generator/utils.py +1 -1
assets/image.png
DELETED
Binary file (657 kB)
|
|
assets/ui-full.png
CHANGED
src/synthetic_dataset_generator/apps/base.py
CHANGED
@@ -129,53 +129,49 @@ def show_success_message(org_name, repo_name) -> gr.Markdown:
|
|
129 |
client = get_argilla_client()
|
130 |
if client is None:
|
131 |
return gr.Markdown(
|
132 |
-
value=
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
</
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
141 |
</div>
|
142 |
-
|
143 |
-
|
144 |
-
By configuring an `ARGILLA_API_URL` and `ARGILLA_API_KEY` you can curate the dataset in Argilla.
|
145 |
-
Unfamiliar with Argilla? Here are some docs to help you get started:
|
146 |
-
<br>• <a href="https://docs.argilla.io/latest/getting_started/quickstart/" target="_blank">How to get started with Argilla</a>
|
147 |
-
<br>• <a href="https://docs.argilla.io/latest/how_to_guides/annotate/" target="_blank">How to curate data in Argilla</a>
|
148 |
-
<br>• <a href="https://docs.argilla.io/latest/how_to_guides/import_export/" target="_blank">How to export data once you have reviewed the dataset</a>
|
149 |
-
</p>
|
150 |
-
</div>
|
151 |
-
"""
|
152 |
)
|
153 |
argilla_api_url = client.api_url
|
154 |
return gr.Markdown(
|
155 |
value=f"""
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
|
|
|
|
|
|
|
|
170 |
</div>
|
171 |
-
|
172 |
-
</div>
|
173 |
-
<p style="margin-top: 1em; color: #333;">
|
174 |
-
Unfamiliar with Argilla? Here are some docs to help you get started:
|
175 |
-
<br>• <a href="https://docs.argilla.io/latest/how_to_guides/annotate/" target="_blank">How to curate data in Argilla</a>
|
176 |
-
<br>• <a href="https://docs.argilla.io/latest/how_to_guides/import_export/" target="_blank">How to export data once you have reviewed the dataset</a>
|
177 |
-
</p>
|
178 |
-
""",
|
179 |
visible=True,
|
180 |
)
|
181 |
|
|
|
129 |
client = get_argilla_client()
|
130 |
if client is None:
|
131 |
return gr.Markdown(
|
132 |
+
value="""
|
133 |
+
<div style="padding: 1em; background-color: var(--block-background-fill); border-color: var(--border-color-primary); border-width: 1px; border-radius: 5px;">
|
134 |
+
<h3 style="color: #2e7d32; margin: 0;">Dataset Published Successfully!</h3>
|
135 |
+
<p style="margin-top: 0.5em;">
|
136 |
+
The generated dataset is in the right format for fine-tuning with TRL, AutoTrain, or other frameworks.
|
137 |
+
<a href="https://huggingface.co/datasets/{org_name}/{repo_name}" target="_blank" class="lg primary svelte-cmf5ev" style="color: white !important; margin-top: 0.5em; text-decoration: none;">
|
138 |
+
Open in Hub
|
139 |
+
</a>
|
140 |
+
</p>
|
141 |
+
<p style="margin-top: 1em; color: #333;">
|
142 |
+
By configuring an `ARGILLA_API_URL` and `ARGILLA_API_KEY` you can curate the dataset in Argilla.
|
143 |
+
Unfamiliar with Argilla? Here are some docs to help you get started:
|
144 |
+
<br>• <a href="https://docs.argilla.io/latest/getting_started/quickstart/" target="_blank">How to get started with Argilla</a>
|
145 |
+
<br>• <a href="https://docs.argilla.io/latest/how_to_guides/annotate/" target="_blank">How to curate data in Argilla</a>
|
146 |
+
<br>• <a href="https://docs.argilla.io/latest/how_to_guides/import_export/" target="_blank">How to export data once you have reviewed the dataset</a>
|
147 |
+
</p>
|
148 |
</div>
|
149 |
+
""",
|
150 |
+
visible=True,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
151 |
)
|
152 |
argilla_api_url = client.api_url
|
153 |
return gr.Markdown(
|
154 |
value=f"""
|
155 |
+
<div style="padding: 1em; background-color: var(--block-background-fill); border-color: var(--border-color-primary); border-width: 1px; border-radius: 5px;">
|
156 |
+
<h3 style="color: #2e7d32; margin: 0;">Dataset Published Successfully!</h3>
|
157 |
+
<p style="margin-top: 0.5em;">
|
158 |
+
The generated dataset is in the right format for fine-tuning with TRL, AutoTrain, or other frameworks.
|
159 |
+
<div style="display: flex; gap: 10px;">
|
160 |
+
<a href="{argilla_api_url}" target="_blank" class="lg primary svelte-cmf5ev" style="color: white !important; margin-top: 0.5em; text-decoration: none;">
|
161 |
+
Open in Argilla
|
162 |
+
</a>
|
163 |
+
<a href="https://huggingface.co/datasets/{org_name}/{repo_name}" target="_blank" class="lg secondary svelte-cmf5ev" style="color: black !important; margin-top: 0.5em; text-decoration: none;">
|
164 |
+
Open in Hub
|
165 |
+
</a>
|
166 |
+
</div>
|
167 |
+
</p>
|
168 |
+
<p style="margin-top: 1em; color: #333;">
|
169 |
+
Unfamiliar with Argilla? Here are some docs to help you get started:
|
170 |
+
<br>• <a href="https://docs.argilla.io/latest/how_to_guides/annotate/" target="_blank">How to curate data in Argilla</a>
|
171 |
+
<br>• <a href="https://docs.argilla.io/latest/how_to_guides/import_export/" target="_blank">How to export data once you have reviewed the dataset</a>
|
172 |
+
</p>
|
173 |
</div>
|
174 |
+
""",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
175 |
visible=True,
|
176 |
)
|
177 |
|
src/synthetic_dataset_generator/apps/eval.py
CHANGED
@@ -713,7 +713,7 @@ with gr.Blocks() as app:
|
|
713 |
with gr.Column(scale=3):
|
714 |
success_message = gr.Markdown(visible=True)
|
715 |
with gr.Accordion(
|
716 |
-
"
|
717 |
open=False,
|
718 |
visible=False,
|
719 |
) as pipeline_code_ui:
|
|
|
713 |
with gr.Column(scale=3):
|
714 |
success_message = gr.Markdown(visible=True)
|
715 |
with gr.Accordion(
|
716 |
+
"Customize your pipeline with distilabel",
|
717 |
open=False,
|
718 |
visible=False,
|
719 |
) as pipeline_code_ui:
|
src/synthetic_dataset_generator/apps/sft.py
CHANGED
@@ -381,15 +381,13 @@ with gr.Blocks() as app:
|
|
381 |
"Create",
|
382 |
variant="primary",
|
383 |
)
|
384 |
-
with gr.Column(scale=
|
385 |
examples = gr.Examples(
|
386 |
examples=DEFAULT_DATASET_DESCRIPTIONS,
|
387 |
inputs=[dataset_description],
|
388 |
cache_examples=False,
|
389 |
label="Examples",
|
390 |
)
|
391 |
-
with gr.Column(scale=1):
|
392 |
-
pass
|
393 |
|
394 |
gr.HTML(value="<hr>")
|
395 |
gr.Markdown(value="## 2. Configure your dataset")
|
@@ -437,12 +435,12 @@ with gr.Blocks() as app:
|
|
437 |
scale=1,
|
438 |
)
|
439 |
temperature = gr.Slider(
|
|
|
440 |
minimum=0.1,
|
441 |
maximum=1,
|
442 |
value=0.8,
|
443 |
step=0.1,
|
444 |
interactive=True,
|
445 |
-
show_label=False,
|
446 |
)
|
447 |
private = gr.Checkbox(
|
448 |
label="Private dataset",
|
@@ -456,7 +454,7 @@ with gr.Blocks() as app:
|
|
456 |
with gr.Column(scale=3):
|
457 |
success_message = gr.Markdown(visible=True)
|
458 |
with gr.Accordion(
|
459 |
-
"
|
460 |
open=False,
|
461 |
visible=False,
|
462 |
) as pipeline_code_ui:
|
|
|
381 |
"Create",
|
382 |
variant="primary",
|
383 |
)
|
384 |
+
with gr.Column(scale=3):
|
385 |
examples = gr.Examples(
|
386 |
examples=DEFAULT_DATASET_DESCRIPTIONS,
|
387 |
inputs=[dataset_description],
|
388 |
cache_examples=False,
|
389 |
label="Examples",
|
390 |
)
|
|
|
|
|
391 |
|
392 |
gr.HTML(value="<hr>")
|
393 |
gr.Markdown(value="## 2. Configure your dataset")
|
|
|
435 |
scale=1,
|
436 |
)
|
437 |
temperature = gr.Slider(
|
438 |
+
label="Temperature",
|
439 |
minimum=0.1,
|
440 |
maximum=1,
|
441 |
value=0.8,
|
442 |
step=0.1,
|
443 |
interactive=True,
|
|
|
444 |
)
|
445 |
private = gr.Checkbox(
|
446 |
label="Private dataset",
|
|
|
454 |
with gr.Column(scale=3):
|
455 |
success_message = gr.Markdown(visible=True)
|
456 |
with gr.Accordion(
|
457 |
+
"Customize your pipeline with distilabel",
|
458 |
open=False,
|
459 |
visible=False,
|
460 |
) as pipeline_code_ui:
|
src/synthetic_dataset_generator/apps/textcat.py
CHANGED
@@ -355,15 +355,13 @@ with gr.Blocks() as app:
|
|
355 |
"Create",
|
356 |
variant="primary",
|
357 |
)
|
358 |
-
with gr.Column(scale=
|
359 |
examples = gr.Examples(
|
360 |
examples=DEFAULT_DATASET_DESCRIPTIONS,
|
361 |
inputs=[dataset_description],
|
362 |
cache_examples=False,
|
363 |
label="Examples",
|
364 |
)
|
365 |
-
with gr.Column(scale=1):
|
366 |
-
pass
|
367 |
|
368 |
gr.HTML("<hr>")
|
369 |
gr.Markdown("## 2. Configure your dataset")
|
@@ -441,12 +439,12 @@ with gr.Blocks() as app:
|
|
441 |
scale=1,
|
442 |
)
|
443 |
temperature = gr.Slider(
|
|
|
444 |
minimum=0.1,
|
445 |
maximum=1,
|
446 |
value=0.8,
|
447 |
step=0.1,
|
448 |
interactive=True,
|
449 |
-
show_label=False,
|
450 |
)
|
451 |
private = gr.Checkbox(
|
452 |
label="Private dataset",
|
@@ -458,7 +456,7 @@ with gr.Blocks() as app:
|
|
458 |
with gr.Column(scale=3):
|
459 |
success_message = gr.Markdown(visible=True)
|
460 |
with gr.Accordion(
|
461 |
-
"
|
462 |
open=False,
|
463 |
visible=False,
|
464 |
) as pipeline_code_ui:
|
|
|
355 |
"Create",
|
356 |
variant="primary",
|
357 |
)
|
358 |
+
with gr.Column(scale=3):
|
359 |
examples = gr.Examples(
|
360 |
examples=DEFAULT_DATASET_DESCRIPTIONS,
|
361 |
inputs=[dataset_description],
|
362 |
cache_examples=False,
|
363 |
label="Examples",
|
364 |
)
|
|
|
|
|
365 |
|
366 |
gr.HTML("<hr>")
|
367 |
gr.Markdown("## 2. Configure your dataset")
|
|
|
439 |
scale=1,
|
440 |
)
|
441 |
temperature = gr.Slider(
|
442 |
+
label="Temperature",
|
443 |
minimum=0.1,
|
444 |
maximum=1,
|
445 |
value=0.8,
|
446 |
step=0.1,
|
447 |
interactive=True,
|
|
|
448 |
)
|
449 |
private = gr.Checkbox(
|
450 |
label="Private dataset",
|
|
|
456 |
with gr.Column(scale=3):
|
457 |
success_message = gr.Markdown(visible=True)
|
458 |
with gr.Accordion(
|
459 |
+
"Customize your pipeline with distilabel",
|
460 |
open=False,
|
461 |
visible=False,
|
462 |
) as pipeline_code_ui:
|
src/synthetic_dataset_generator/utils.py
CHANGED
@@ -28,7 +28,7 @@ def list_orgs(oauth_token: Union[OAuthToken, None] = None):
|
|
28 |
if data["auth"]["type"] == "oauth":
|
29 |
organizations = [data["name"]] + [org["name"] for org in data["orgs"]]
|
30 |
elif data["auth"]["type"] == "access_token":
|
31 |
-
organizations = [org["name"] for org in data["orgs"]]
|
32 |
else:
|
33 |
organizations = [
|
34 |
entry["entity"]["name"]
|
|
|
28 |
if data["auth"]["type"] == "oauth":
|
29 |
organizations = [data["name"]] + [org["name"] for org in data["orgs"]]
|
30 |
elif data["auth"]["type"] == "access_token":
|
31 |
+
organizations = [data["name"]] + [org["name"] for org in data["orgs"]]
|
32 |
else:
|
33 |
organizations = [
|
34 |
entry["entity"]["name"]
|