Added OpenRouter + env file; basic funcitonality working
Browse files- .env.example +2 -0
- .gitignore +1 -0
- README.md +56 -1
- TestQuesitons.txt +3 -0
- app.py +47 -14
- watch.py +36 -0
.env.example
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
OPENROUTER_API_KEY=your_api_key_here
|
2 |
+
OPENROUTER_BASE_URL=https://openrouter.ai/api/v1/chat/completions
|
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
.env
|
README.md
CHANGED
@@ -1,6 +1,61 @@
|
|
1 |
# Vibes Benchmark v0.1
|
2 |
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
Run it with
|
6 |
`python app.py`
|
|
|
1 |
# Vibes Benchmark v0.1
|
2 |
|
3 |
+
A tool for benchmarking different AI models by comparing their responses to custom questions.
|
4 |
+
|
5 |
+
## Prerequisites
|
6 |
+
|
7 |
+
- Python 3.8 or higher
|
8 |
+
- An OpenRouter API key ([Get one here](https://openrouter.ai/))
|
9 |
+
|
10 |
+
## Setup
|
11 |
+
|
12 |
+
1. Clone the repository:
|
13 |
+
```bash
|
14 |
+
git clone [repository-url]
|
15 |
+
cd vibes-benchmark
|
16 |
+
```
|
17 |
+
|
18 |
+
2. Install dependencies:
|
19 |
+
```bash
|
20 |
+
pip install -r requirements.txt
|
21 |
+
```
|
22 |
+
|
23 |
+
3. Configure environment variables:
|
24 |
+
```bash
|
25 |
+
cp .env.example .env
|
26 |
+
```
|
27 |
+
Then edit `.env` and add your OpenRouter API key
|
28 |
+
|
29 |
+
## Usage
|
30 |
+
|
31 |
+
1. Prepare a text file with your questions (one per line)
|
32 |
+
2. Run the application:
|
33 |
+
```bash
|
34 |
+
python app.py
|
35 |
+
```
|
36 |
+
3. Upload your questions file through the web interface
|
37 |
+
4. Click "Run Benchmark" to start comparing model responses
|
38 |
+
|
39 |
+
## Features
|
40 |
+
|
41 |
+
- Compare responses from different AI models side by side
|
42 |
+
- Supports up to 10 questions per benchmark
|
43 |
+
- Randomly selects different models for comparison
|
44 |
+
- Real-time response generation
|
45 |
+
|
46 |
+
## Supported Models
|
47 |
+
|
48 |
+
- Claude 3 Opus
|
49 |
+
- Claude 3 Sonnet
|
50 |
+
- Gemini Pro
|
51 |
+
- Mistral Medium
|
52 |
+
- Claude 2.1
|
53 |
+
- GPT-4 Turbo
|
54 |
+
- GPT-3.5 Turbo
|
55 |
+
|
56 |
+
## License
|
57 |
+
|
58 |
+
[Your chosen license]
|
59 |
|
60 |
Run it with
|
61 |
`python app.py`
|
TestQuesitons.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
How many states are in america?
|
2 |
+
|
3 |
+
How much wood could a woodchuck chuck if a woodchuck could chuck wood?
|
app.py
CHANGED
@@ -1,6 +1,12 @@
|
|
1 |
import gradio as gr
|
2 |
import random
|
3 |
import time
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
MAX_QUESTIONS = 10 # Maximum number of questions to support
|
6 |
|
@@ -8,26 +14,53 @@ MAX_QUESTIONS = 10 # Maximum number of questions to support
|
|
8 |
# Fix the models
|
9 |
#
|
10 |
MODELS = [
|
11 |
-
"anthropic/claude-3-opus",
|
12 |
-
"anthropic/claude-3-sonnet",
|
13 |
"google/gemini-pro",
|
14 |
-
"
|
15 |
-
"
|
16 |
-
"
|
17 |
-
"
|
18 |
]
|
19 |
#
|
20 |
######
|
21 |
|
22 |
-
|
23 |
-
|
24 |
-
|
|
|
|
|
|
|
|
|
25 |
def get_response(question, model):
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
#
|
30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
def read_questions(file_obj):
|
33 |
"""Read questions from uploaded file and return as list"""
|
|
|
1 |
import gradio as gr
|
2 |
import random
|
3 |
import time
|
4 |
+
import os
|
5 |
+
import requests
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
|
8 |
+
# Load environment variables
|
9 |
+
load_dotenv()
|
10 |
|
11 |
MAX_QUESTIONS = 10 # Maximum number of questions to support
|
12 |
|
|
|
14 |
# Fix the models
|
15 |
#
|
16 |
MODELS = [
|
17 |
+
"anthropic/claude-3-opus-20240229",
|
18 |
+
"anthropic/claude-3-sonnet-20240229",
|
19 |
"google/gemini-pro",
|
20 |
+
"mistralai/mistral-medium", # Updated from mistral-7b-instruct
|
21 |
+
"anthropic/claude-2.1",
|
22 |
+
"openai/gpt-4-turbo-preview",
|
23 |
+
"openai/gpt-3.5-turbo"
|
24 |
]
|
25 |
#
|
26 |
######
|
27 |
|
28 |
+
# Get configuration from environment variables
|
29 |
+
OPENROUTER_API_KEY = os.getenv('OPENROUTER_API_KEY')
|
30 |
+
OPENROUTER_BASE_URL = os.getenv('OPENROUTER_BASE_URL')
|
31 |
+
|
32 |
+
if not OPENROUTER_API_KEY or not OPENROUTER_BASE_URL:
|
33 |
+
raise ValueError("Missing required environment variables. Please check your .env file.")
|
34 |
+
|
35 |
def get_response(question, model):
|
36 |
+
"""Get response from OpenRouter API for the given question and model."""
|
37 |
+
headers = {
|
38 |
+
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
|
39 |
+
"HTTP-Referer": "http://localhost:7860", # Replace with your actual domain
|
40 |
+
"Content-Type": "application/json"
|
41 |
+
}
|
42 |
+
|
43 |
+
data = {
|
44 |
+
"model": model,
|
45 |
+
"messages": [
|
46 |
+
{"role": "user", "content": question}
|
47 |
+
]
|
48 |
+
}
|
49 |
+
|
50 |
+
try:
|
51 |
+
response = requests.post(
|
52 |
+
OPENROUTER_BASE_URL,
|
53 |
+
headers=headers,
|
54 |
+
json=data,
|
55 |
+
timeout=30 # 30 second timeout
|
56 |
+
)
|
57 |
+
response.raise_for_status()
|
58 |
+
|
59 |
+
result = response.json()
|
60 |
+
return result['choices'][0]['message']['content']
|
61 |
+
|
62 |
+
except requests.exceptions.RequestException as e:
|
63 |
+
return f"Error: Failed to get response from {model}: {str(e)}"
|
64 |
|
65 |
def read_questions(file_obj):
|
66 |
"""Read questions from uploaded file and return as list"""
|
watch.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from watchdog.observers import Observer
|
2 |
+
from watchdog.events import FileSystemEventHandler
|
3 |
+
import subprocess
|
4 |
+
import time
|
5 |
+
import sys
|
6 |
+
|
7 |
+
class AppReloader(FileSystemEventHandler):
|
8 |
+
def __init__(self):
|
9 |
+
self.process = None
|
10 |
+
self.start_app()
|
11 |
+
|
12 |
+
def start_app(self):
|
13 |
+
if self.process:
|
14 |
+
self.process.terminate()
|
15 |
+
self.process.wait()
|
16 |
+
print("\n--- Restarting app.py ---\n")
|
17 |
+
self.process = subprocess.Popen([sys.executable, "app.py"])
|
18 |
+
|
19 |
+
def on_modified(self, event):
|
20 |
+
if event.src_path.endswith('app.py'):
|
21 |
+
self.start_app()
|
22 |
+
|
23 |
+
if __name__ == "__main__":
|
24 |
+
event_handler = AppReloader()
|
25 |
+
observer = Observer()
|
26 |
+
observer.schedule(event_handler, path='.', recursive=False)
|
27 |
+
observer.start()
|
28 |
+
|
29 |
+
try:
|
30 |
+
while True:
|
31 |
+
time.sleep(1)
|
32 |
+
except KeyboardInterrupt:
|
33 |
+
observer.stop()
|
34 |
+
if event_handler.process:
|
35 |
+
event_handler.process.terminate()
|
36 |
+
observer.join()
|