dvilasuero HF staff osanseviero commited on
Commit
5fca25d
1 Parent(s): 4242ca3

Add disabled view if not logged in (#2)

Browse files

- Add disabled view if not logged in (b01e31464889ff209bd1f7252d630e467bae67ee)
- Update src/distilabel_dataset_generator/apps/sft.py (3b2d0792227500e25a1e5c7e9478eaf6eafcd7e1)
- Update src/distilabel_dataset_generator/utils.py (fc6513bd22884a03289c677d2808517d4dcc203d)
- Update src/distilabel_dataset_generator/utils.py (43d40a3c350fe4eee20ae25d35cbcc004e4ce1d0)


Co-authored-by: Omar Sanseviero <[email protected]>

src/distilabel_dataset_generator/apps/sft.py CHANGED
@@ -1,5 +1,6 @@
1
  import multiprocessing
2
  import time
 
3
 
4
  import gradio as gr
5
  import pandas as pd
@@ -17,7 +18,6 @@ from src.distilabel_dataset_generator.pipelines.sft import (
17
  from src.distilabel_dataset_generator.utils import (
18
  get_login_button,
19
  get_org_dropdown,
20
- get_token,
21
  )
22
 
23
 
@@ -65,8 +65,8 @@ def generate_dataset(
65
  private=True,
66
  org_name=None,
67
  repo_name=None,
68
- token=None,
69
  progress=gr.Progress(),
 
70
  ):
71
  repo_id = (
72
  f"{org_name}/{repo_name}"
@@ -78,12 +78,6 @@ def generate_dataset(
78
  raise gr.Error(
79
  "Please provide a repo_name and org_name to push the dataset to."
80
  )
81
- try:
82
- whoami(token=token)
83
- except Exception:
84
- raise gr.Error(
85
- "Provide a Hugging Face token with write access to the organization you want to push the dataset to."
86
- )
87
 
88
  if num_turns > 4:
89
  num_turns = 4
@@ -132,7 +126,7 @@ def generate_dataset(
132
  repo_id=repo_id,
133
  private=private,
134
  include_script=False,
135
- token=token,
136
  )
137
 
138
  # If not pushing to hub generate the dataset directly
@@ -152,11 +146,16 @@ def generate_pipeline_code() -> str:
152
 
153
  return pipeline_code
154
 
 
 
 
155
 
156
  with gr.Blocks(
157
  title="⚗️ Distilabel Dataset Generator",
158
  head="⚗️ Distilabel Dataset Generator",
 
159
  ) as app:
 
160
  gr.Markdown("## Iterate on a sample dataset")
161
  with gr.Column() as main_ui:
162
  dataset_description = gr.TextArea(
@@ -209,7 +208,7 @@ with gr.Blocks(
209
  # Add a header for the full dataset generation section
210
  gr.Markdown("## Generate full dataset")
211
  gr.Markdown(
212
- "Once you're satisfied with the sample, generate a larger dataset and push it to the hub. Get <a href='https://huggingface.co/settings/tokens' target='_blank'>a Hugging Face token</a> with write access to the organization you want to push the dataset to. A OAuth login resets the session state, so watch out not to lose your generated system prompt!"
213
  )
214
 
215
  with gr.Column() as push_to_hub_ui:
@@ -231,8 +230,6 @@ with gr.Blocks(
231
  )
232
 
233
  with gr.Row(variant="panel"):
234
- get_login_button()
235
- hf_token = gr.Textbox(label="HF token", type="password")
236
  org_name = get_org_dropdown()
237
  repo_name = gr.Textbox(label="Repo name", placeholder="dataset_name")
238
  private = gr.Checkbox(
@@ -277,7 +274,6 @@ with gr.Blocks(
277
  private,
278
  org_name,
279
  repo_name,
280
- hf_token,
281
  ],
282
  outputs=[table],
283
  show_progress=True,
@@ -296,5 +292,4 @@ with gr.Blocks(
296
  label="Distilabel Pipeline Code",
297
  )
298
 
299
- app.load(get_token, outputs=[hf_token])
300
  app.load(get_org_dropdown, outputs=[org_name])
 
1
  import multiprocessing
2
  import time
3
+ from typing import Union
4
 
5
  import gradio as gr
6
  import pandas as pd
 
18
  from src.distilabel_dataset_generator.utils import (
19
  get_login_button,
20
  get_org_dropdown,
 
21
  )
22
 
23
 
 
65
  private=True,
66
  org_name=None,
67
  repo_name=None,
 
68
  progress=gr.Progress(),
69
+ oauth_token: Union[gr.OAuthToken, None]
70
  ):
71
  repo_id = (
72
  f"{org_name}/{repo_name}"
 
78
  raise gr.Error(
79
  "Please provide a repo_name and org_name to push the dataset to."
80
  )
 
 
 
 
 
 
81
 
82
  if num_turns > 4:
83
  num_turns = 4
 
126
  repo_id=repo_id,
127
  private=private,
128
  include_script=False,
129
+ token=oauth_token.token,
130
  )
131
 
132
  # If not pushing to hub generate the dataset directly
 
146
 
147
  return pipeline_code
148
 
149
+ css = """
150
+ .main_ui_logged_out{opacity: 0.3; pointer-events: none}
151
+ """
152
 
153
  with gr.Blocks(
154
  title="⚗️ Distilabel Dataset Generator",
155
  head="⚗️ Distilabel Dataset Generator",
156
+ css=css
157
  ) as app:
158
+ get_login_button()
159
  gr.Markdown("## Iterate on a sample dataset")
160
  with gr.Column() as main_ui:
161
  dataset_description = gr.TextArea(
 
208
  # Add a header for the full dataset generation section
209
  gr.Markdown("## Generate full dataset")
210
  gr.Markdown(
211
+ "Once you're satisfied with the sample, generate a larger dataset and push it to the Hub."
212
  )
213
 
214
  with gr.Column() as push_to_hub_ui:
 
230
  )
231
 
232
  with gr.Row(variant="panel"):
 
 
233
  org_name = get_org_dropdown()
234
  repo_name = gr.Textbox(label="Repo name", placeholder="dataset_name")
235
  private = gr.Checkbox(
 
274
  private,
275
  org_name,
276
  repo_name,
 
277
  ],
278
  outputs=[table],
279
  show_progress=True,
 
292
  label="Distilabel Pipeline Code",
293
  )
294
 
 
295
  app.load(get_org_dropdown, outputs=[org_name])
src/distilabel_dataset_generator/utils.py CHANGED
@@ -47,18 +47,17 @@ def get_duplicate_button():
47
  return gr.DuplicateButton(size="lg")
48
 
49
 
50
- def list_orgs(token: OAuthToken = None):
51
- if token is not None:
52
- data = whoami(token)
53
- organisations = [
54
- entry["entity"]["name"]
55
- for entry in data["auth"]["accessToken"]["fineGrained"]["scoped"]
56
- if "repo.write" in entry["permissions"]
57
- ]
58
- organisations.append(data["name"])
59
- return list(set(organisations))
60
- else:
61
  return []
 
 
 
 
 
 
 
 
62
 
63
 
64
  def get_org_dropdown(token: OAuthToken = None):
 
47
  return gr.DuplicateButton(size="lg")
48
 
49
 
50
+ def list_orgs(oauth_token: OAuthToken = None):
51
+ if oauth_token is None:
 
 
 
 
 
 
 
 
 
52
  return []
53
+ data = whoami(oauth_token.token)
54
+ organisations = [
55
+ entry["entity"]["name"]
56
+ for entry in data["auth"]["accessToken"]["fineGrained"]["scoped"]
57
+ if "repo.write" in entry["permissions"]
58
+ ]
59
+ organisations.append(data["name"])
60
+ return list(set(organisations))
61
 
62
 
63
  def get_org_dropdown(token: OAuthToken = None):