MT / app.py
ArmelRandy's picture
Update app.py
4a0f178
import os
import json
import shutil
import gradio as gr
from datasets import load_dataset
from huggingface_hub import upload_file
from io import StringIO
import pandas as pd
import datetime
HF_TOKEN = os.environ.get("HF_TOKEN", None)
DIALOGUES_DATASET = "ArmelRandy/MT_dialogues"
def load_data():
dataset = load_dataset("ArmelR/oasst1_guanaco_english", use_auth_token=HF_TOKEN)
return dataset
samples = load_data()
splits = list(samples.keys())
languages = ["Wolof"]
print(f"current directory {os.getcwd()}")
print(f"total path {os.path.dirname(os.path.realpath(__file__))}")
custom_css = """
#banner-image {
display: block;
margin-left: auto;
margin-right: auto;
}
#chat-message {
font-size: 14px;
min-height: 300px;
}
"""
def caller_split(s):
return 0, samples[s][0]["prompt"], samples[s][0]["completion"]
def identity(index, split):
ds = samples[split][index]
return ds["prompt"], ds["completion"]
def save(index, language, split, prompt, completion):
buffer = StringIO()
now = datetime.datetime.now()
timestamp = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%f")
file_name = f"prompts_{timestamp}.jsonl"
if len(prompt) != 0 and len(completion) != 0 :
print("Saving ...")
data = {"prompt": prompt, "completion": completion, "language": language, "index": index}
pd.DataFrame([data]).to_json(buffer, orient="records", lines=True)
# Push to Hub
upload_file(
path_in_repo=f"{now.date()}/{now.hour}/{file_name}",
path_or_fileobj=buffer.getvalue().encode(),
repo_id=DIALOGUES_DATASET,
token=HF_TOKEN,
repo_type="dataset",
)
# Clean and rerun
buffer.close()
next_index = min(1+index, len(samples[split])-1)
return next_index, samples[split][next_index]["prompt"], samples[split][next_index]["completion"], "", ""
else :
return index, samples[split][index]["prompt"], samples[split][index]["completion"], "", ""
with gr.Blocks(analytics_enabled=False, css=custom_css) as demo:
gr.HTML("""<h1 align="center">MT💫</h1>""")
# gr.Markdown("""""")
with gr.Blocks():
with gr.Row() :
split = gr.Dropdown(choices=splits, label="Dataset split", value=splits[0])
with gr.Row() :
index_example = gr.Slider(minimum=0, maximum=10000, step=1, value=0, interactive=True, info=f"Index of the chosen instruction-output pair.")
with gr.Row() :
with gr.Column():
prompt = gr.Textbox(label="prompt")
with gr.Column():
completion = gr.Code(label="Completion")
with gr.Blocks():
with gr.Row() :
language = gr.Dropdown(choices=languages, label="Translation language", value=languages[0])
with gr.Row() :
with gr.Column() :
translated_prompt = gr.Textbox(label="Translated prompt")
with gr.Column() :
translated_completion = gr.Textbox(label="Translated completion")
with gr.Row() :
button = gr.Button(value="Submit")
split.change(caller_split, inputs=[split], outputs=[index_example, prompt, completion])
index_example.release(identity, inputs=[index_example, split], outputs=[prompt, completion])
button.click(save, inputs=[index_example, language, split, translated_prompt, translated_completion], outputs=[index_example, prompt, completion, translated_prompt, translated_completion])
demo.launch(debug=True)