atin121 commited on
Commit
fe81a63
·
1 Parent(s): be0c1cc

ready for demo

Browse files
.gradio/certificate.pem ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
+ TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
+ cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
+ WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
+ ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
+ MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
+ h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
+ 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
+ A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
+ T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
+ B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
+ B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
+ KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
+ OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
+ jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
+ qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
+ rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
+ HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
+ hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
+ ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
+ 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
+ NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
+ ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
+ TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
+ jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
+ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
+ 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
+ mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
+ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
+ -----END CERTIFICATE-----
TestQuesitons.txt DELETED
@@ -1,3 +0,0 @@
1
- How many states are in america?
2
-
3
- How much wood could a woodchuck chuck if a woodchuck could chuck wood?
 
 
 
 
app.py CHANGED
@@ -7,26 +7,37 @@ import json
7
  from dotenv import load_dotenv
8
  import threading
9
  from queue import Queue, Empty
 
10
 
11
  # Load environment variables
12
  load_dotenv()
13
 
 
 
 
 
 
 
 
14
  MAX_QUESTIONS = 10 # Maximum number of questions to support
15
 
16
  ######
17
- # Fix the models
18
  #
19
  MODELS = [
20
- "anthropic/claude-3-opus-20240229",
21
- "anthropic/claude-3-sonnet-20240229",
22
- "google/gemini-pro",
23
- "mistralai/mistral-medium", # Updated from mistral-7b-instruct
24
- "anthropic/claude-2.1",
25
- "openai/gpt-4-turbo-preview",
26
- "openai/gpt-3.5-turbo"
 
 
 
 
 
27
  ]
28
- #
29
- ######
30
 
31
  # Get configuration from environment variables
32
  OPENROUTER_API_KEY = os.getenv('OPENROUTER_API_KEY')
@@ -44,7 +55,7 @@ def get_response(question, model):
44
  }
45
 
46
  data = {
47
- "model": model,
48
  "messages": [
49
  {"role": "user", "content": question}
50
  ],
@@ -92,36 +103,158 @@ def read_questions(file_obj):
92
  return questions
93
 
94
  with gr.Blocks(title="Vibesmark Test Suite") as demo:
95
- gr.Markdown("# Vibesmark Test Suite\nUpload a `.txt` file with **one question per line**.")
96
 
97
  # Store current state
98
- state = gr.State({"questions": [], "current_index": 0})
 
 
 
 
 
 
99
 
100
- file_input = gr.File(label="Upload your questions (.txt)")
101
  with gr.Row():
102
- prev_btn = gr.Button("← Previous", interactive=False)
103
- question_counter = gr.Markdown("Question 0 / 0")
104
- next_btn = gr.Button("Next →", interactive=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
- with gr.Group() as question_group:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  question_display = gr.Markdown("### Upload a file to begin")
108
  with gr.Row():
109
  with gr.Column():
110
- with gr.Accordion("Model 1", open=False):
111
- model1_display = gr.Markdown("")
112
- response1_display = gr.Textbox(label="Response 1", interactive=False, lines=4)
113
  with gr.Column():
114
- with gr.Accordion("Model 2", open=False):
115
- model2_display = gr.Markdown("")
116
- response2_display = gr.Textbox(label="Response 2", interactive=False, lines=4)
117
-
118
- run_button = gr.Button("Run Comparison", variant="primary")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
120
  def process_file(file, state):
121
  if file is None:
122
  raise gr.Error("Please upload a file first.")
123
  questions = read_questions(file)
124
- new_state = {"questions": questions, "current_index": 0}
 
 
 
 
 
 
125
 
126
  # Return outputs in order matching the outputs list in the event handler
127
  return [
@@ -129,14 +262,22 @@ with gr.Blocks(title="Vibesmark Test Suite") as demo:
129
  f"Question 1 / {len(questions)}", # question_counter
130
  gr.update(interactive=False), # prev_btn
131
  gr.update(interactive=len(questions) > 1), # next_btn
132
- gr.update(value=""), # model1_display
133
  gr.update(value=""), # response1_display
134
- gr.update(value=""), # model2_display
135
  gr.update(value=""), # response2_display
136
- new_state # state
 
 
 
 
 
 
137
  ]
138
 
139
- def navigate_question(direction, state):
 
 
 
 
140
  questions = state["questions"]
141
  current_index = state["current_index"]
142
 
@@ -144,21 +285,94 @@ with gr.Blocks(title="Vibesmark Test Suite") as demo:
144
  current_index += 1
145
  elif direction == "prev" and current_index > 0:
146
  current_index -= 1
 
 
147
 
148
  new_state = state.copy()
149
  new_state["current_index"] = current_index
150
 
151
- # Return outputs in order matching the outputs list in the event handler
152
- return [
 
 
 
 
 
 
153
  f"### Question {current_index + 1}:\n{questions[current_index]}", # question_display
154
  f"Question {current_index + 1} / {len(questions)}", # question_counter
155
  gr.update(interactive=current_index > 0), # prev_btn
156
  gr.update(interactive=current_index < len(questions) - 1), # next_btn
157
- gr.update(value=""), # model1_display
158
- gr.update(value=""), # response1_display
159
- gr.update(value=""), # model2_display
160
- gr.update(value=""), # response2_display
161
- new_state # state
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
  ]
163
 
164
  def get_responses_in_parallel(question, model1, model2):
@@ -213,37 +427,81 @@ with gr.Blocks(title="Vibesmark Test Suite") as demo:
213
  t1.join()
214
  t2.join()
215
 
216
- def run_comparison(state):
217
- """
218
- Run comparison for the current question, streaming both models'
219
- responses in parallel.
220
- """
221
- if not state["questions"]:
222
- raise gr.Error("Please upload a file first.")
223
-
224
- current_question = state["questions"][state["current_index"]]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
 
226
- # Pick two distinct models
227
- model_1 = random.choice(MODELS)
228
- remaining_models = [m for m in MODELS if m != model_1]
229
- model_2 = random.choice(remaining_models)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
 
231
- # Initial yield to display chosen models
232
- yield [
233
- gr.update(value=f"**{model_1}**"),
234
- gr.update(value=""),
235
- gr.update(value=f"**{model_2}**"),
236
- gr.update(value="")
 
 
 
 
237
  ]
238
 
239
- # Now stream both model responses in parallel
240
- for partial1, partial2 in get_responses_in_parallel(current_question, model_1, model_2):
241
- yield [
242
- gr.update(value=f"**{model_1}**"),
243
- gr.update(value=partial1),
244
- gr.update(value=f"**{model_2}**"),
245
- gr.update(value=partial2)
246
- ]
247
 
248
  # Connect events
249
  file_input.change(
@@ -254,55 +512,135 @@ with gr.Blocks(title="Vibesmark Test Suite") as demo:
254
  question_counter,
255
  prev_btn,
256
  next_btn,
257
- model1_display,
258
  response1_display,
259
- model2_display,
260
  response2_display,
261
- state
 
 
 
 
 
 
262
  ]
263
  )
264
 
265
  prev_btn.click(
266
- fn=lambda state: navigate_question("prev", state),
267
- inputs=[state],
 
 
 
 
 
268
  outputs=[
269
  question_display,
270
  question_counter,
271
  prev_btn,
272
  next_btn,
273
- model1_display,
274
  response1_display,
275
- model2_display,
276
  response2_display,
277
- state
 
 
 
 
278
  ]
279
  )
280
 
281
  next_btn.click(
282
- fn=lambda state: navigate_question("next", state),
283
- inputs=[state],
 
 
 
 
 
284
  outputs=[
285
  question_display,
286
  question_counter,
287
  prev_btn,
288
  next_btn,
289
- model1_display,
290
  response1_display,
291
- model2_display,
292
  response2_display,
293
- state
 
 
 
 
294
  ]
295
  )
296
 
297
- run_button.click(
298
- fn=run_comparison,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299
  inputs=[state],
300
  outputs=[
301
- model1_display,
 
 
 
 
302
  response1_display,
303
- model2_display,
304
- response2_display
 
 
 
 
 
 
 
305
  ]
 
 
 
306
  )
307
 
308
  # Add footer with subtle styling
@@ -313,4 +651,34 @@ demo.queue()
313
 
314
  # Launch with the appropriate host setting for deployment
315
  if __name__ == "__main__":
316
- demo.launch(share=False, server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  from dotenv import load_dotenv
8
  import threading
9
  from queue import Queue, Empty
10
+ import shutil
11
 
12
  # Load environment variables
13
  load_dotenv()
14
 
15
+ # Create static directory if it doesn't exist
16
+ os.makedirs('static', exist_ok=True)
17
+
18
+ # Copy testquestions.txt to static directory if it exists
19
+ if os.path.exists('testquestions.txt'):
20
+ shutil.copy2('testquestions.txt', 'static/testquestions.txt')
21
+
22
  MAX_QUESTIONS = 10 # Maximum number of questions to support
23
 
24
  ######
25
+ # Models configuration
26
  #
27
  MODELS = [
28
+ # Standard Language Models
29
+ {"display_name": "Claude 3 Opus", "model_id": "anthropic/claude-3-opus-20240229"},
30
+ {"display_name": "Claude 3.5 Sonnet", "model_id": "anthropic/claude-3.5-sonnet"},
31
+ {"display_name": "Gemini Pro", "model_id": "google/gemini-pro"},
32
+ {"display_name": "Mistral Medium", "model_id": "mistralai/mistral-medium"},
33
+ {"display_name": "Claude 2.1", "model_id": "anthropic/claude-2.1"},
34
+ {"display_name": "GPT-4 Turbo", "model_id": "openai/gpt-4-turbo-preview"},
35
+ {"display_name": "GPT-3.5 Turbo", "model_id": "openai/gpt-3.5-turbo"},
36
+ # Reasoning-specialized Models
37
+ {"display_name": "Reasoner: O1-Mini", "model_id": "openai/o1-mini"},
38
+ {"display_name": "Reasoner: O1 Preview", "model_id": "openai/o1-preview"},
39
+ {"display_name": "Reasoner: DeepSeek R1", "model_id": "deepseek/deepseek-r1"}
40
  ]
 
 
41
 
42
  # Get configuration from environment variables
43
  OPENROUTER_API_KEY = os.getenv('OPENROUTER_API_KEY')
 
55
  }
56
 
57
  data = {
58
+ "model": model, # model is now the direct model_id
59
  "messages": [
60
  {"role": "user", "content": question}
61
  ],
 
103
  return questions
104
 
105
  with gr.Blocks(title="Vibesmark Test Suite") as demo:
106
+ gr.Markdown("# Vibesmark Test Suite")
107
 
108
  # Store current state
109
+ state = gr.State({
110
+ "questions": [],
111
+ "current_index": 0,
112
+ "preferences": {}, # Store preferences for each question
113
+ "current_model_order": {}, # Track which model is shown on which side
114
+ "test_started": False # Track if test has started
115
+ })
116
 
117
+ # Move model selection to the top
118
  with gr.Row():
119
+ with gr.Column():
120
+ model1_selector = gr.Dropdown(
121
+ choices={model["model_id"]: model["display_name"] for model in MODELS},
122
+ label="Select First Model",
123
+ value="anthropic/claude-3.5-sonnet",
124
+ type="value",
125
+ allow_custom_value=False
126
+ )
127
+ with gr.Column():
128
+ model2_selector = gr.Dropdown(
129
+ choices={model["model_id"]: model["display_name"] for model in MODELS},
130
+ label="Select Second Model",
131
+ value="google/gemini-pro",
132
+ type="value",
133
+ allow_custom_value=False
134
+ )
135
 
136
+ with gr.Row():
137
+ with gr.Column():
138
+ gr.Markdown("Upload a `.txt` file with **one question per line**.")
139
+ file_input = gr.File(label="Upload your questions (.txt)")
140
+ with gr.Column():
141
+ gr.Markdown("Download example questions:")
142
+ gr.HTML('<a href="testquestions.txt" download>Download testquestions.txt</a>')
143
+
144
+ with gr.Row():
145
+ start_btn = gr.Button("Start Test", variant="primary")
146
+ finish_btn = gr.Button("Finish & Show Results", variant="secondary", interactive=False)
147
+ results_display = gr.Markdown("Click 'Finish & Show Results' when you're done to see the summary", visible=True)
148
+
149
+ # Add confirmation dialog
150
+ with gr.Row(visible=False) as confirm_dialog:
151
+ gr.Markdown("Are you sure you want to finish the test? This will reset all progress.")
152
+ with gr.Row():
153
+ confirm_btn = gr.Button("Yes, Finish Test", variant="primary")
154
+ cancel_btn = gr.Button("Cancel", variant="secondary")
155
+
156
+ with gr.Group(visible=False) as question_group:
157
  question_display = gr.Markdown("### Upload a file to begin")
158
  with gr.Row():
159
  with gr.Column():
160
+ response1_display = gr.Textbox(label="Response A", interactive=False, lines=8)
 
 
161
  with gr.Column():
162
+ response2_display = gr.Textbox(label="Response B", interactive=False, lines=8)
163
+
164
+ # Add preference selection buttons
165
+ with gr.Row():
166
+ prefer_a_btn = gr.Button("Prefer Response A", interactive=False, variant="secondary")
167
+ preference_display = gr.Markdown("Make your selection", container=True)
168
+ prefer_b_btn = gr.Button("Prefer Response B", interactive=False, variant="secondary")
169
+
170
+ # Add vertical spacing
171
+ gr.Row(height=30)
172
+
173
+ # Move navigation to bottom of question group
174
+ with gr.Row():
175
+ prev_btn = gr.Button("← Previous", interactive=False)
176
+ question_counter = gr.Markdown("Question 0 / 0")
177
+ next_btn = gr.Button("Next →", interactive=False)
178
+
179
+ def start_test(state, model_1, model_2):
180
+ """Start the test and lock model selection"""
181
+ if not state["questions"]:
182
+ raise gr.Error("Please upload a file first.")
183
+
184
+ if model_1 == model_2:
185
+ raise gr.Error("Please select different models for comparison.")
186
+
187
+ new_state = state.copy()
188
+ new_state["test_started"] = True
189
+ current_index = state["current_index"]
190
+ current_question = state["questions"][current_index]
191
+
192
+ # Get existing preference if any
193
+ current_pref = state["preferences"].get(current_index, None)
194
+ pref_display = "Make your selection"
195
+ if current_pref is not None:
196
+ pref_display = f"You preferred Response {current_pref}"
197
+
198
+ # First yield the initial state updates
199
+ yield [
200
+ new_state,
201
+ gr.update(interactive=False), # model1_selector
202
+ gr.update(interactive=False), # model2_selector
203
+ gr.update(interactive=False), # start_btn
204
+ gr.update(interactive=True), # finish_btn
205
+ "", # response1_display
206
+ "", # response2_display
207
+ gr.update(interactive=True), # prefer_a_btn - Enable immediately
208
+ gr.update(interactive=True), # prefer_b_btn - Enable immediately
209
+ pref_display, # preference_display
210
+ gr.update(visible=True) # question_group
211
+ ]
212
+
213
+ # Randomly decide which model goes on which side
214
+ if random.choice([True, False]):
215
+ model_a, model_b = model_1, model_2
216
+ else:
217
+ model_a, model_b = model_2, model_1
218
+
219
+ # Store the model order in state
220
+ new_state["current_model_order"][current_index] = {
221
+ "A": model_a,
222
+ "B": model_b
223
+ }
224
+
225
+ # Stream both model responses in parallel
226
+ for partial1, partial2 in get_responses_in_parallel(current_question, model_a, model_b):
227
+ # Check current preference again in case it changed during streaming
228
+ current_pref = new_state["preferences"].get(current_index, None)
229
+ pref_display = "Make your selection"
230
+ if current_pref is not None:
231
+ pref_display = f"You preferred Response {current_pref}"
232
+
233
+ yield [
234
+ new_state,
235
+ gr.update(interactive=False), # model1_selector
236
+ gr.update(interactive=False), # model2_selector
237
+ gr.update(interactive=False), # start_btn
238
+ gr.update(interactive=True), # finish_btn
239
+ partial1, # response1_display
240
+ partial2, # response2_display
241
+ gr.update(interactive=True), # prefer_a_btn - Keep enabled during streaming
242
+ gr.update(interactive=True), # prefer_b_btn - Keep enabled during streaming
243
+ pref_display, # preference_display - Maintain current preference
244
+ gr.update(visible=True) # question_group
245
+ ]
246
 
247
  def process_file(file, state):
248
  if file is None:
249
  raise gr.Error("Please upload a file first.")
250
  questions = read_questions(file)
251
+ new_state = {
252
+ "questions": questions,
253
+ "current_index": 0,
254
+ "preferences": {},
255
+ "current_model_order": {},
256
+ "test_started": False
257
+ }
258
 
259
  # Return outputs in order matching the outputs list in the event handler
260
  return [
 
262
  f"Question 1 / {len(questions)}", # question_counter
263
  gr.update(interactive=False), # prev_btn
264
  gr.update(interactive=len(questions) > 1), # next_btn
 
265
  gr.update(value=""), # response1_display
 
266
  gr.update(value=""), # response2_display
267
+ gr.update(interactive=False), # prefer_a_btn
268
+ gr.update(interactive=False), # prefer_b_btn
269
+ "Make your selection", # preference_display
270
+ new_state, # state
271
+ gr.update(interactive=True), # start_btn
272
+ gr.update(interactive=False), # finish_btn
273
+ gr.update(visible=False) # question_group
274
  ]
275
 
276
+ def navigate_question(direction, state, model_1, model_2):
277
+ """Navigate to next/prev question and start fetching responses"""
278
+ if not state["test_started"]:
279
+ raise gr.Error("Please start the test first")
280
+
281
  questions = state["questions"]
282
  current_index = state["current_index"]
283
 
 
285
  current_index += 1
286
  elif direction == "prev" and current_index > 0:
287
  current_index -= 1
288
+ else:
289
+ raise gr.Error("No more questions in that direction")
290
 
291
  new_state = state.copy()
292
  new_state["current_index"] = current_index
293
 
294
+ # Get existing preference for this question if any
295
+ current_pref = state["preferences"].get(current_index, None)
296
+ pref_display = "Make your selection"
297
+ if current_pref is not None:
298
+ pref_display = f"You preferred Response {current_pref}"
299
+
300
+ # First yield to update the question display and clear responses
301
+ yield [
302
  f"### Question {current_index + 1}:\n{questions[current_index]}", # question_display
303
  f"Question {current_index + 1} / {len(questions)}", # question_counter
304
  gr.update(interactive=current_index > 0), # prev_btn
305
  gr.update(interactive=current_index < len(questions) - 1), # next_btn
306
+ "", # response1_display
307
+ "", # response2_display
308
+ gr.update(interactive=True), # prefer_a_btn - Enable immediately
309
+ gr.update(interactive=True), # prefer_b_btn - Enable immediately
310
+ pref_display, # preference_display
311
+ new_state, # state
312
+ gr.update(visible=True) # question_group
313
+ ]
314
+
315
+ # Now start fetching responses
316
+ current_question = questions[current_index]
317
+
318
+ # Randomly decide which model goes on which side
319
+ if random.choice([True, False]):
320
+ model_a, model_b = model_1, model_2
321
+ else:
322
+ model_a, model_b = model_2, model_1
323
+
324
+ # Store the model order in state
325
+ new_state["current_model_order"][current_index] = {
326
+ "A": model_a,
327
+ "B": model_b
328
+ }
329
+
330
+ # Stream both model responses in parallel
331
+ for partial1, partial2 in get_responses_in_parallel(current_question, model_a, model_b):
332
+ # Check current preference again in case it changed during streaming
333
+ current_pref = new_state["preferences"].get(current_index, None)
334
+ pref_display = "Make your selection"
335
+ if current_pref is not None:
336
+ pref_display = f"You preferred Response {current_pref}"
337
+
338
+ yield [
339
+ f"### Question {current_index + 1}:\n{questions[current_index]}", # question_display
340
+ f"Question {current_index + 1} / {len(questions)}", # question_counter
341
+ gr.update(interactive=current_index > 0), # prev_btn
342
+ gr.update(interactive=current_index < len(questions) - 1), # next_btn
343
+ partial1, # response1_display
344
+ partial2, # response2_display
345
+ gr.update(interactive=True), # prefer_a_btn - Keep enabled during streaming
346
+ gr.update(interactive=True), # prefer_b_btn - Keep enabled during streaming
347
+ pref_display, # preference_display - Maintain current preference
348
+ new_state, # state
349
+ gr.update(visible=True) # question_group
350
+ ]
351
+
352
+ def record_preference(choice, state):
353
+ """Record user's preference for the current question"""
354
+ current_index = state["current_index"]
355
+ new_state = state.copy()
356
+ new_state["preferences"][current_index] = choice
357
+
358
+ # Get the actual models for this choice
359
+ model_order = state["current_model_order"].get(current_index, {})
360
+ model_a = model_order.get("A", "Unknown")
361
+ model_b = model_order.get("B", "Unknown")
362
+
363
+ # Create a more detailed preference message
364
+ if choice == "A":
365
+ preferred_model = model_a
366
+ other_model = model_b
367
+ else:
368
+ preferred_model = model_b
369
+ other_model = model_a
370
+
371
+ message = f"You preferred {preferred_model} over {other_model}"
372
+
373
+ return [
374
+ new_state,
375
+ message
376
  ]
377
 
378
  def get_responses_in_parallel(question, model1, model2):
 
427
  t1.join()
428
  t2.join()
429
 
430
+ def reset_interface():
431
+ """Reset all interface elements to their initial state"""
432
+ return [
433
+ gr.update(interactive=True), # model1_selector
434
+ gr.update(interactive=True), # model2_selector
435
+ gr.update(interactive=True), # start_btn
436
+ gr.update(interactive=False), # finish_btn
437
+ gr.update(value=""), # response1_display
438
+ gr.update(value=""), # response2_display
439
+ gr.update(interactive=False), # prefer_a_btn
440
+ gr.update(interactive=False), # prefer_b_btn
441
+ "Make your selection", # preference_display
442
+ gr.update(value="### Upload a file to begin"), # question_display
443
+ gr.update(value="Question 0 / 0"), # question_counter
444
+ gr.update(interactive=False), # prev_btn
445
+ gr.update(interactive=False), # next_btn
446
+ { # Fresh state
447
+ "questions": [],
448
+ "current_index": 0,
449
+ "preferences": {},
450
+ "current_model_order": {},
451
+ "test_started": False
452
+ },
453
+ gr.update(visible=False) # question_group
454
+ ]
455
 
456
+ def generate_results_summary(state):
457
+ """Generate a summary of which model was preferred for which questions"""
458
+ if not state["preferences"]:
459
+ return ["No preferences recorded yet."] + reset_interface()
460
+
461
+ # Create a mapping of model to preferred question numbers
462
+ model_preferences = {}
463
+
464
+ for q_idx, choice in state["preferences"].items():
465
+ # Get the model order for this question
466
+ model_order = state["current_model_order"].get(q_idx, {})
467
+ if not model_order:
468
+ continue
469
+
470
+ # Determine which model was preferred
471
+ preferred_model = model_order["A"] if choice == "A" else model_order["B"]
472
+
473
+ # Get display name for the model
474
+ display_name = next((m["display_name"] for m in MODELS if m["model_id"] == preferred_model), preferred_model)
475
+
476
+ if display_name not in model_preferences:
477
+ model_preferences[display_name] = []
478
+ model_preferences[display_name].append(str(q_idx + 1)) # +1 for 1-based indexing
479
+
480
+ # Format the results
481
+ summary_parts = []
482
+ for model, questions in model_preferences.items():
483
+ summary_parts.append(f"**{model}** won questions {', '.join(questions)}")
484
+
485
+ summary = "### Results Summary\n" + "\n\n".join(summary_parts)
486
+
487
+ # Return summary and reset interface
488
+ return [summary] + reset_interface() + [gr.update(visible=False)] # Hide question_group
489
 
490
+ def show_confirm_dialog(state):
491
+ """Show confirmation dialog if test has started"""
492
+ if not state["test_started"] or not state["questions"]:
493
+ return [
494
+ gr.update(visible=False), # confirm_dialog
495
+ ["No test in progress to finish."] + reset_interface() + [gr.update(visible=False)] # results and reset
496
+ ]
497
+ return [
498
+ gr.update(visible=True), # confirm_dialog
499
+ None # No results update
500
  ]
501
 
502
+ def hide_confirm_dialog():
503
+ """Hide the confirmation dialog"""
504
+ return gr.update(visible=False)
 
 
 
 
 
505
 
506
  # Connect events
507
  file_input.change(
 
512
  question_counter,
513
  prev_btn,
514
  next_btn,
 
515
  response1_display,
 
516
  response2_display,
517
+ prefer_a_btn,
518
+ prefer_b_btn,
519
+ preference_display,
520
+ state,
521
+ start_btn,
522
+ finish_btn,
523
+ question_group
524
  ]
525
  )
526
 
527
  prev_btn.click(
528
+ fn=navigate_question,
529
+ inputs=[
530
+ gr.State("prev"),
531
+ state,
532
+ model1_selector,
533
+ model2_selector
534
+ ],
535
  outputs=[
536
  question_display,
537
  question_counter,
538
  prev_btn,
539
  next_btn,
 
540
  response1_display,
 
541
  response2_display,
542
+ prefer_a_btn,
543
+ prefer_b_btn,
544
+ preference_display,
545
+ state,
546
+ question_group
547
  ]
548
  )
549
 
550
  next_btn.click(
551
+ fn=navigate_question,
552
+ inputs=[
553
+ gr.State("next"),
554
+ state,
555
+ model1_selector,
556
+ model2_selector
557
+ ],
558
  outputs=[
559
  question_display,
560
  question_counter,
561
  prev_btn,
562
  next_btn,
 
563
  response1_display,
 
564
  response2_display,
565
+ prefer_a_btn,
566
+ prefer_b_btn,
567
+ preference_display,
568
+ state,
569
+ question_group
570
  ]
571
  )
572
 
573
+ start_btn.click(
574
+ fn=start_test,
575
+ inputs=[state, model1_selector, model2_selector],
576
+ outputs=[
577
+ state,
578
+ model1_selector,
579
+ model2_selector,
580
+ start_btn,
581
+ finish_btn,
582
+ response1_display,
583
+ response2_display,
584
+ prefer_a_btn,
585
+ prefer_b_btn,
586
+ preference_display,
587
+ question_group
588
+ ]
589
+ )
590
+
591
+ # Connect preference buttons
592
+ prefer_a_btn.click(
593
+ fn=lambda state: record_preference("A", state),
594
+ inputs=[state],
595
+ outputs=[state, preference_display]
596
+ )
597
+
598
+ prefer_b_btn.click(
599
+ fn=lambda state: record_preference("B", state),
600
+ inputs=[state],
601
+ outputs=[state, preference_display]
602
+ )
603
+
604
+ # Connect results button to show confirmation first
605
+ finish_btn.click(
606
+ fn=show_confirm_dialog,
607
+ inputs=[state],
608
+ outputs=[
609
+ confirm_dialog,
610
+ results_display
611
+ ]
612
+ )
613
+
614
+ # Connect cancel button
615
+ cancel_btn.click(
616
+ fn=hide_confirm_dialog,
617
+ outputs=[confirm_dialog]
618
+ )
619
+
620
+ # Connect confirm button to actual finish action
621
+ confirm_btn.click(
622
+ fn=generate_results_summary,
623
  inputs=[state],
624
  outputs=[
625
+ results_display,
626
+ model1_selector,
627
+ model2_selector,
628
+ start_btn,
629
+ finish_btn,
630
  response1_display,
631
+ response2_display,
632
+ prefer_a_btn,
633
+ prefer_b_btn,
634
+ preference_display,
635
+ question_display,
636
+ question_counter,
637
+ prev_btn,
638
+ next_btn,
639
+ state
640
  ]
641
+ ).then(
642
+ fn=hide_confirm_dialog,
643
+ outputs=[confirm_dialog]
644
  )
645
 
646
  # Add footer with subtle styling
 
651
 
652
  # Launch with the appropriate host setting for deployment
653
  if __name__ == "__main__":
654
+ print("\nStarting Vibesmark Test Suite...")
655
+ print("You can access the app at: http://localhost:7860")
656
+
657
+ # Create a FastAPI app to serve the example file
658
+ from fastapi import FastAPI
659
+ from fastapi.responses import FileResponse
660
+ from fastapi.middleware.cors import CORSMiddleware
661
+
662
+ app = FastAPI()
663
+
664
+ # Add CORS middleware
665
+ app.add_middleware(
666
+ CORSMiddleware,
667
+ allow_origins=["*"],
668
+ allow_credentials=True,
669
+ allow_methods=["*"],
670
+ allow_headers=["*"],
671
+ )
672
+
673
+ @app.get("/testquestions.txt")
674
+ async def get_example_file():
675
+ return FileResponse("testquestions.txt")
676
+
677
+ # Mount FastAPI app to Gradio
678
+ demo.app.mount("/", app)
679
+
680
+ demo.launch(
681
+ server_name="0.0.0.0", # Allows external connections
682
+ server_port=7860,
683
+ share=False
684
+ )
static/testquestions.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ What is the capital of France?
2
+ Explain quantum entanglement in simple terms.
3
+ How does photosynthesis work?
4
+ What causes the seasons on Earth?
5
+ Write a haiku about artificial intelligence.
6
+ What are the key differences between classical and quantum computers?
7
+ Explain the concept of recursion in programming.
8
+ What is the significance of the number pi?
9
+ How do vaccines work to protect against diseases?
10
+ What causes the Northern Lights phenomenon?
testquestions.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ What is the capital of France?
2
+ Explain quantum entanglement in simple terms.
3
+ How does photosynthesis work?
4
+ What causes the seasons on Earth?
5
+ Write a haiku about artificial intelligence.
6
+ What are the key differences between classical and quantum computers?
7
+ Explain the concept of recursion in programming.
8
+ What is the significance of the number pi?
9
+ How do vaccines work to protect against diseases?
10
+ What causes the Northern Lights phenomenon?