Spaces:
Runtime error
Runtime error
import os | |
import json | |
import shutil | |
import gradio as gr | |
from datasets import load_dataset | |
from huggingface_hub import upload_file | |
from io import StringIO | |
import pandas as pd | |
import datetime | |
HF_TOKEN = os.environ.get("HF_TOKEN", None) | |
DIALOGUES_DATASET = "ArmelRandy/MT_dialogues" | |
def load_data(): | |
dataset = load_dataset("ArmelR/oasst1_guanaco_english", use_auth_token=HF_TOKEN) | |
return dataset | |
samples = load_data() | |
splits = list(samples.keys()) | |
languages = ["Wolof"] | |
print(f"current directory {os.getcwd()}") | |
print(f"total path {os.path.dirname(os.path.realpath(__file__))}") | |
custom_css = """ | |
#banner-image { | |
display: block; | |
margin-left: auto; | |
margin-right: auto; | |
} | |
#chat-message { | |
font-size: 14px; | |
min-height: 300px; | |
} | |
""" | |
def caller_split(s): | |
return 0, samples[s][0]["prompt"], samples[s][0]["completion"] | |
def identity(index, split): | |
ds = samples[split][index] | |
return ds["prompt"], ds["completion"] | |
def save(index, language, split, prompt, completion): | |
buffer = StringIO() | |
now = datetime.datetime.now() | |
timestamp = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%f") | |
file_name = f"prompts_{timestamp}.jsonl" | |
if len(prompt) != 0 and len(completion) != 0 : | |
print("Saving ...") | |
data = {"prompt": prompt, "completion": completion, "language": language, "index": index} | |
pd.DataFrame([data]).to_json(buffer, orient="records", lines=True) | |
# Push to Hub | |
upload_file( | |
path_in_repo=f"{now.date()}/{now.hour}/{file_name}", | |
path_or_fileobj=buffer.getvalue().encode(), | |
repo_id=DIALOGUES_DATASET, | |
token=HF_TOKEN, | |
repo_type="dataset", | |
) | |
# Clean and rerun | |
buffer.close() | |
next_index = min(1+index, len(samples[split])-1) | |
return next_index, samples[split][next_index]["prompt"], samples[split][next_index]["completion"], "", "" | |
else : | |
return index, samples[split][index]["prompt"], samples[split][index]["completion"], "", "" | |
with gr.Blocks(analytics_enabled=False, css=custom_css) as demo: | |
gr.HTML("""<h1 align="center">MT💫</h1>""") | |
# gr.Markdown("""""") | |
with gr.Blocks(): | |
with gr.Row() : | |
split = gr.Dropdown(choices=splits, label="Dataset split", value=splits[0]) | |
with gr.Row() : | |
index_example = gr.Slider(minimum=0, maximum=10000, step=1, value=0, interactive=True, info=f"Index of the chosen instruction-output pair.") | |
with gr.Row() : | |
with gr.Column(): | |
prompt = gr.Textbox(label="prompt") | |
with gr.Column(): | |
completion = gr.Code(label="Completion") | |
with gr.Blocks(): | |
with gr.Row() : | |
language = gr.Dropdown(choices=languages, label="Translation language", value=languages[0]) | |
with gr.Row() : | |
with gr.Column() : | |
translated_prompt = gr.Textbox(label="Translated prompt") | |
with gr.Column() : | |
translated_completion = gr.Textbox(label="Translated completion") | |
with gr.Row() : | |
button = gr.Button(value="Submit") | |
split.change(caller_split, inputs=[split], outputs=[index_example, prompt, completion]) | |
index_example.release(identity, inputs=[index_example, split], outputs=[prompt, completion]) | |
button.click(save, inputs=[index_example, language, split, translated_prompt, translated_completion], outputs=[index_example, prompt, completion, translated_prompt, translated_completion]) | |
demo.launch(debug=True) |