sdiazlor's picture
sdiazlor HF staff
texcat-generation_review (#13)
e756bd8 verified
import os
from typing import Union, List, Optional
import argilla as rg
import gradio as gr
from gradio.oauth import (
OAUTH_CLIENT_ID,
OAUTH_CLIENT_SECRET,
OAUTH_SCOPES,
OPENID_PROVIDER_URL,
get_space,
)
from huggingface_hub import whoami
_LOGGED_OUT_CSS = ".main_ui_logged_out{opacity: 0.3; pointer-events: none}"
HF_TOKENS = [os.getenv("HF_TOKEN")] + [os.getenv(f"HF_TOKEN_{i}") for i in range(1, 10)]
HF_TOKENS = [token for token in HF_TOKENS if token]
_CHECK_IF_SPACE_IS_SET = (
all(
[
OAUTH_CLIENT_ID,
OAUTH_CLIENT_SECRET,
OAUTH_SCOPES,
OPENID_PROVIDER_URL,
]
)
or get_space() is None
)
if _CHECK_IF_SPACE_IS_SET:
from gradio.oauth import OAuthToken
else:
OAuthToken = str
def get_login_button():
return gr.LoginButton(
value="Sign in with Hugging Face!", size="lg", scale=2
).activate()
def get_duplicate_button():
if get_space() is not None:
return gr.DuplicateButton(size="lg")
def list_orgs(oauth_token: OAuthToken = None):
if oauth_token is None:
return []
data = whoami(oauth_token.token)
if data["auth"]["type"] == "oauth":
organisations = [data["name"]] + [org["name"] for org in data["orgs"]]
else:
organisations = [
entry["entity"]["name"]
for entry in data["auth"]["accessToken"]["fineGrained"]["scoped"]
if "repo.write" in entry["permissions"]
]
organisations = [org for org in organisations if org != data["name"]]
organisations = [data["name"]] + organisations
return organisations
def get_org_dropdown(oauth_token: OAuthToken = None):
orgs = list_orgs(oauth_token)
return gr.Dropdown(
label="Organization",
choices=orgs,
value=orgs[0] if orgs else None,
allow_custom_value=True,
)
def get_token(oauth_token: OAuthToken = None):
if oauth_token:
return oauth_token.token
else:
return ""
def swap_visibilty(oauth_token: Optional[OAuthToken] = None):
if oauth_token:
return gr.update(elem_classes=["main_ui_logged_in"])
else:
return gr.update(elem_classes=["main_ui_logged_out"])
def get_base_app():
with gr.Blocks(
title="🧬 Synthetic Data Generator",
head="🧬 Synthetic Data Generator",
css=_LOGGED_OUT_CSS,
) as app:
with gr.Row():
gr.Markdown(
"Want to run this locally or with other LLMs? Take a look at the FAQ tab. distilabel Synthetic Data Generator is free, we use the authentication token to push the dataset to the Hugging Face Hub and not for data generation."
)
with gr.Row():
gr.Column()
get_login_button()
gr.Column()
gr.Markdown("## Iterate on a sample dataset")
with gr.Column() as main_ui:
pass
return app
def get_argilla_client() -> Union[rg.Argilla, None]:
try:
api_url = os.getenv("ARGILLA_API_URL_SDG_REVIEWER")
api_key = os.getenv("ARGILLA_API_KEY_SDG_REVIEWER")
if api_url is None or api_key is None:
api_url = os.getenv("ARGILLA_API_URL")
api_key = os.getenv("ARGILLA_API_KEY")
return rg.Argilla(
api_url=api_url,
api_key=api_key,
)
except Exception:
return None
def get_preprocess_labels(labels: Optional[List[str]]) -> List[str]:
return list(set([label.lower().strip() for label in labels])) if labels else []