atin121 commited on
Commit
67a3b3a
·
1 Parent(s): 06fdab0

Added OpenRouter + env file; basic funcitonality working

Browse files
Files changed (6) hide show
  1. .env.example +2 -0
  2. .gitignore +1 -0
  3. README.md +56 -1
  4. TestQuesitons.txt +3 -0
  5. app.py +47 -14
  6. watch.py +36 -0
.env.example ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ OPENROUTER_API_KEY=your_api_key_here
2
+ OPENROUTER_BASE_URL=https://openrouter.ai/api/v1/chat/completions
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .env
README.md CHANGED
@@ -1,6 +1,61 @@
1
  # Vibes Benchmark v0.1
2
 
3
- Benchmark on Vibes
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  Run it with
6
  `python app.py`
 
1
  # Vibes Benchmark v0.1
2
 
3
+ A tool for benchmarking different AI models by comparing their responses to custom questions.
4
+
5
+ ## Prerequisites
6
+
7
+ - Python 3.8 or higher
8
+ - An OpenRouter API key ([Get one here](https://openrouter.ai/))
9
+
10
+ ## Setup
11
+
12
+ 1. Clone the repository:
13
+ ```bash
14
+ git clone [repository-url]
15
+ cd vibes-benchmark
16
+ ```
17
+
18
+ 2. Install dependencies:
19
+ ```bash
20
+ pip install -r requirements.txt
21
+ ```
22
+
23
+ 3. Configure environment variables:
24
+ ```bash
25
+ cp .env.example .env
26
+ ```
27
+ Then edit `.env` and add your OpenRouter API key
28
+
29
+ ## Usage
30
+
31
+ 1. Prepare a text file with your questions (one per line)
32
+ 2. Run the application:
33
+ ```bash
34
+ python app.py
35
+ ```
36
+ 3. Upload your questions file through the web interface
37
+ 4. Click "Run Benchmark" to start comparing model responses
38
+
39
+ ## Features
40
+
41
+ - Compare responses from different AI models side by side
42
+ - Supports up to 10 questions per benchmark
43
+ - Randomly selects different models for comparison
44
+ - Real-time response generation
45
+
46
+ ## Supported Models
47
+
48
+ - Claude 3 Opus
49
+ - Claude 3 Sonnet
50
+ - Gemini Pro
51
+ - Mistral Medium
52
+ - Claude 2.1
53
+ - GPT-4 Turbo
54
+ - GPT-3.5 Turbo
55
+
56
+ ## License
57
+
58
+ [Your chosen license]
59
 
60
  Run it with
61
  `python app.py`
TestQuesitons.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ How many states are in america?
2
+
3
+ How much wood could a woodchuck chuck if a woodchuck could chuck wood?
app.py CHANGED
@@ -1,6 +1,12 @@
1
  import gradio as gr
2
  import random
3
  import time
 
 
 
 
 
 
4
 
5
  MAX_QUESTIONS = 10 # Maximum number of questions to support
6
 
@@ -8,26 +14,53 @@ MAX_QUESTIONS = 10 # Maximum number of questions to support
8
  # Fix the models
9
  #
10
  MODELS = [
11
- "anthropic/claude-3-opus",
12
- "anthropic/claude-3-sonnet",
13
  "google/gemini-pro",
14
- "meta-llama/llama-2-70b-chat",
15
- "mistral/mistral-medium",
16
- "deepseek/deepseek-coder",
17
- "deepseek/deepseek-r1",
18
  ]
19
  #
20
  ######
21
 
22
- ######
23
- # Add OpenRouter here
24
- #
 
 
 
 
25
  def get_response(question, model):
26
- # Simulate an API call with a random delay
27
- time.sleep(random.uniform(0.5, 1.5))
28
- return f"Sample response from {model} for: {question}"
29
- #
30
- ######
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  def read_questions(file_obj):
33
  """Read questions from uploaded file and return as list"""
 
1
  import gradio as gr
2
  import random
3
  import time
4
+ import os
5
+ import requests
6
+ from dotenv import load_dotenv
7
+
8
+ # Load environment variables
9
+ load_dotenv()
10
 
11
  MAX_QUESTIONS = 10 # Maximum number of questions to support
12
 
 
14
  # Fix the models
15
  #
16
  MODELS = [
17
+ "anthropic/claude-3-opus-20240229",
18
+ "anthropic/claude-3-sonnet-20240229",
19
  "google/gemini-pro",
20
+ "mistralai/mistral-medium", # Updated from mistral-7b-instruct
21
+ "anthropic/claude-2.1",
22
+ "openai/gpt-4-turbo-preview",
23
+ "openai/gpt-3.5-turbo"
24
  ]
25
  #
26
  ######
27
 
28
+ # Get configuration from environment variables
29
+ OPENROUTER_API_KEY = os.getenv('OPENROUTER_API_KEY')
30
+ OPENROUTER_BASE_URL = os.getenv('OPENROUTER_BASE_URL')
31
+
32
+ if not OPENROUTER_API_KEY or not OPENROUTER_BASE_URL:
33
+ raise ValueError("Missing required environment variables. Please check your .env file.")
34
+
35
  def get_response(question, model):
36
+ """Get response from OpenRouter API for the given question and model."""
37
+ headers = {
38
+ "Authorization": f"Bearer {OPENROUTER_API_KEY}",
39
+ "HTTP-Referer": "http://localhost:7860", # Replace with your actual domain
40
+ "Content-Type": "application/json"
41
+ }
42
+
43
+ data = {
44
+ "model": model,
45
+ "messages": [
46
+ {"role": "user", "content": question}
47
+ ]
48
+ }
49
+
50
+ try:
51
+ response = requests.post(
52
+ OPENROUTER_BASE_URL,
53
+ headers=headers,
54
+ json=data,
55
+ timeout=30 # 30 second timeout
56
+ )
57
+ response.raise_for_status()
58
+
59
+ result = response.json()
60
+ return result['choices'][0]['message']['content']
61
+
62
+ except requests.exceptions.RequestException as e:
63
+ return f"Error: Failed to get response from {model}: {str(e)}"
64
 
65
  def read_questions(file_obj):
66
  """Read questions from uploaded file and return as list"""
watch.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from watchdog.observers import Observer
2
+ from watchdog.events import FileSystemEventHandler
3
+ import subprocess
4
+ import time
5
+ import sys
6
+
7
+ class AppReloader(FileSystemEventHandler):
8
+ def __init__(self):
9
+ self.process = None
10
+ self.start_app()
11
+
12
+ def start_app(self):
13
+ if self.process:
14
+ self.process.terminate()
15
+ self.process.wait()
16
+ print("\n--- Restarting app.py ---\n")
17
+ self.process = subprocess.Popen([sys.executable, "app.py"])
18
+
19
+ def on_modified(self, event):
20
+ if event.src_path.endswith('app.py'):
21
+ self.start_app()
22
+
23
+ if __name__ == "__main__":
24
+ event_handler = AppReloader()
25
+ observer = Observer()
26
+ observer.schedule(event_handler, path='.', recursive=False)
27
+ observer.start()
28
+
29
+ try:
30
+ while True:
31
+ time.sleep(1)
32
+ except KeyboardInterrupt:
33
+ observer.stop()
34
+ if event_handler.process:
35
+ event_handler.process.terminate()
36
+ observer.join()