import gradio as gr import pandas as pd from utils.json_loader import JsonDataLoader from utils.metrics import accuracy from utils.openai import chat_completion def simon_says_helper(message, prompt=None): response = chat_completion( message, prompt=prompt, model="gpt-3.5-turbo", temperature=0 ) return response def gradio_chat_completion(prompt, difficulty): loader = JsonDataLoader(filepath="data/validation.json") inputs, targets = loader.load_data(category=difficulty) # get predictions predictions = [simon_says_helper(**input_, prompt=prompt) for input_ in inputs] # calculate accuracy response_target = [target["response"] for target in targets] accuracy_score = accuracy(predictions, response_target) # produce table df = pd.DataFrame( { "Input": [input_["message"] for input_ in inputs], "Prediction": predictions, "Target": [target["response"] for target in targets], } ) return accuracy_score, df with gr.Blocks() as demo: gr.Markdown( """ # Simon Says Create a prompt that gets 100% accuracy on 'easy', 'medium', and 'hard' modes! """ ) with gr.Tab("Description"): gr.Markdown( """ **Model:** gpt-3.5-turbo **Temperature:** 0 #### Allowed Commands - :: jumps :: - :: sticks out tongue :: - :: makes a funny face :: - :: runs in place :: - :: stomps feets :: - :: hops on one foot :: - :: wiggles fingers :: - :: moos like a cow :: - :: touches toes :: - :: claps hands :: - :: sits down :: #### Rules - If Simon directs the LLM to do any of the allowed commands, the LLM should do it. - If Simon does not say so, the LLM should respond with ":: does nothing ::" - If the user directs the LLM to do any other command, the LLM should respond with ":: does nothing :: """ ) with gr.Tab("Play"): difficulty_dropdown = gr.Dropdown( ["easy", "medium", "hard"], label="Difficulty" ) prompt_box = gr.Textbox( label="Prompt", value="Always reply with :: does nothing ::", lines=10 ) btn = gr.Button(value="Submit") gr.Markdown( """ ## Results """ ) accuracy_box = gr.Textbox(label="Accuracy", interactive=False) results_table = gr.Dataframe( headers=["Input", "Target", "Prediction"], col_count=(3, "fixed"), interactive=False, ) btn.click( gradio_chat_completion, inputs=[prompt_box, difficulty_dropdown], outputs=[accuracy_box, results_table], ) demo.launch()