|
import gradio as gr |
|
import random |
|
import time |
|
import os |
|
import requests |
|
from dotenv import load_dotenv |
|
|
|
|
|
load_dotenv() |
|
|
|
MAX_QUESTIONS = 10 |
|
|
|
|
|
|
|
|
|
MODELS = [ |
|
"anthropic/claude-3-opus-20240229", |
|
"anthropic/claude-3-sonnet-20240229", |
|
"google/gemini-pro", |
|
"mistralai/mistral-medium", |
|
"anthropic/claude-2.1", |
|
"openai/gpt-4-turbo-preview", |
|
"openai/gpt-3.5-turbo" |
|
] |
|
|
|
|
|
|
|
|
|
OPENROUTER_API_KEY = os.getenv('OPENROUTER_API_KEY') |
|
OPENROUTER_BASE_URL = os.getenv('OPENROUTER_BASE_URL') |
|
|
|
if not OPENROUTER_API_KEY or not OPENROUTER_BASE_URL: |
|
raise ValueError("Missing required environment variables. Please check your .env file.") |
|
|
|
def get_response(question, model): |
|
"""Get response from OpenRouter API for the given question and model.""" |
|
headers = { |
|
"Authorization": f"Bearer {OPENROUTER_API_KEY}", |
|
"HTTP-Referer": "http://localhost:7860", |
|
"Content-Type": "application/json" |
|
} |
|
|
|
data = { |
|
"model": model, |
|
"messages": [ |
|
{"role": "user", "content": question} |
|
] |
|
} |
|
|
|
try: |
|
response = requests.post( |
|
OPENROUTER_BASE_URL, |
|
headers=headers, |
|
json=data, |
|
timeout=30 |
|
) |
|
response.raise_for_status() |
|
|
|
result = response.json() |
|
return result['choices'][0]['message']['content'] |
|
|
|
except requests.exceptions.RequestException as e: |
|
return f"Error: Failed to get response from {model}: {str(e)}" |
|
|
|
def read_questions(file_obj): |
|
"""Read questions from uploaded file and return as list""" |
|
with open(file_obj.name, 'r') as file: |
|
questions = [q.strip() for q in file.readlines() if q.strip()] |
|
if len(questions) > MAX_QUESTIONS: |
|
raise gr.Error(f"Maximum {MAX_QUESTIONS} questions allowed.") |
|
return questions |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# Vibes Benchmark\nUpload a `.txt` file with **one question per line**.") |
|
|
|
file_input = gr.File(label="Upload your questions (.txt)") |
|
run_button = gr.Button("Run Benchmark", variant="primary") |
|
|
|
|
|
response_areas = [] |
|
for i in range(MAX_QUESTIONS): |
|
with gr.Group(visible=False) as group_i: |
|
gr.Markdown(f"### Question {i+1}") |
|
with gr.Row(): |
|
with gr.Column(): |
|
|
|
with gr.Accordion("Model 1", open=False): |
|
model1_i = gr.Markdown("") |
|
response1_i = gr.Textbox(label="Response 1", interactive=False, lines=4) |
|
with gr.Column(): |
|
|
|
with gr.Accordion("Model 2", open=False): |
|
model2_i = gr.Markdown("") |
|
response2_i = gr.Textbox(label="Response 2", interactive=False, lines=4) |
|
gr.Markdown("---") |
|
|
|
response_areas.append({ |
|
'group': group_i, |
|
'model1': model1_i, |
|
'response1': response1_i, |
|
'model2': model2_i, |
|
'response2': response2_i |
|
}) |
|
|
|
def process_file(file): |
|
"""Show/hide question groups depending on how many questions are in the file.""" |
|
if file is None: |
|
raise gr.Error("Please upload a file first.") |
|
questions = read_questions(file) |
|
|
|
|
|
updates = [] |
|
for i in range(MAX_QUESTIONS): |
|
updates.append(gr.update(visible=(i < len(questions)))) |
|
|
|
return updates |
|
|
|
def run_benchmark(file): |
|
"""Generator function yielding partial updates in real time.""" |
|
questions = read_questions(file) |
|
|
|
|
|
|
|
|
|
updates = [gr.update(value="")] * (MAX_QUESTIONS * 4) |
|
|
|
|
|
for i, question in enumerate(questions): |
|
|
|
model_1 = random.choice(MODELS) |
|
updates[i*4] = gr.update(value=f"**{model_1}**") |
|
yield updates |
|
|
|
|
|
response_1 = get_response(question, model_1) |
|
updates[i*4 + 1] = gr.update(value=response_1) |
|
yield updates |
|
|
|
|
|
remaining_models = [m for m in MODELS if m != model_1] |
|
model_2 = random.choice(remaining_models) |
|
updates[i*4 + 2] = gr.update(value=f"**{model_2}**") |
|
yield updates |
|
|
|
|
|
response_2 = get_response(question, model_2) |
|
updates[i*4 + 3] = gr.update(value=response_2) |
|
yield updates |
|
|
|
|
|
update_targets = [] |
|
for area in response_areas: |
|
update_targets.append(area['model1']) |
|
update_targets.append(area['response1']) |
|
update_targets.append(area['model2']) |
|
update_targets.append(area['response2']) |
|
|
|
|
|
file_input.change( |
|
fn=process_file, |
|
inputs=file_input, |
|
outputs=[area['group'] for area in response_areas] |
|
) |
|
|
|
run_button.click( |
|
fn=run_benchmark, |
|
inputs=file_input, |
|
outputs=update_targets |
|
) |
|
|
|
|
|
demo.queue() |
|
demo.launch() |
|
|