Spaces:
Sleeping
Sleeping
update
Browse files- .gitattributes +67 -0
- app.py +2 -0
.gitattributes
CHANGED
@@ -334,3 +334,70 @@ heatmaps/Yi-34B-Chat_CoT.jpg filter=lfs diff=lfs merge=lfs -text
|
|
334 |
heatmaps/claude-3-opus-20240229_vision.jpg filter=lfs diff=lfs merge=lfs -text
|
335 |
heatmaps/claude-3-sonnet-20240229_CoT.jpg filter=lfs diff=lfs merge=lfs -text
|
336 |
heatmaps/deepseek-llm-67b-chat_Textonly.jpg filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
334 |
heatmaps/claude-3-opus-20240229_vision.jpg filter=lfs diff=lfs merge=lfs -text
|
335 |
heatmaps/claude-3-sonnet-20240229_CoT.jpg filter=lfs diff=lfs merge=lfs -text
|
336 |
heatmaps/deepseek-llm-67b-chat_Textonly.jpg filter=lfs diff=lfs merge=lfs -text
|
337 |
+
heatmaps/CodeLlama-70b-Instruct-hf_CoT.png filter=lfs diff=lfs merge=lfs -text
|
338 |
+
heatmaps/Mixtral-8x7B-Instruct-v0.1_CoT.png filter=lfs diff=lfs merge=lfs -text
|
339 |
+
heatmaps/claude-3-haiku-20240307_Textonly.png filter=lfs diff=lfs merge=lfs -text
|
340 |
+
heatmaps/Yi-34B-Chat_CoT.png filter=lfs diff=lfs merge=lfs -text
|
341 |
+
heatmaps/claude-3-sonnet-20240229_CoT.png filter=lfs diff=lfs merge=lfs -text
|
342 |
+
heatmaps/claude-3-sonnet-20240229_vision.png filter=lfs diff=lfs merge=lfs -text
|
343 |
+
heatmaps/gemini-pro_vision-CoT.png filter=lfs diff=lfs merge=lfs -text
|
344 |
+
heatmaps/gpt-35-turbo_CoT.png filter=lfs diff=lfs merge=lfs -text
|
345 |
+
heatmaps/Llama-3-70b-chat-hf_CoT.png filter=lfs diff=lfs merge=lfs -text
|
346 |
+
heatmaps/Mistral-7B-Instruct-v0.2_Textonly.png filter=lfs diff=lfs merge=lfs -text
|
347 |
+
heatmaps/claude-3-haiku-20240307_CoT.png filter=lfs diff=lfs merge=lfs -text
|
348 |
+
heatmaps/deepseek-llm-67b-chat_CoT.png filter=lfs diff=lfs merge=lfs -text
|
349 |
+
heatmaps/gpt-4-turbo-2024-04-09_Textonly.png filter=lfs diff=lfs merge=lfs -text
|
350 |
+
heatmaps/Qwen1.5-72B-Chat_Textonly.png filter=lfs diff=lfs merge=lfs -text
|
351 |
+
heatmaps/dbrx-instruct_Textonly.png filter=lfs diff=lfs merge=lfs -text
|
352 |
+
heatmaps/claude-3-haiku-20240307_vision-CoT.png filter=lfs diff=lfs merge=lfs -text
|
353 |
+
heatmaps/gemini-pro-vision_vision.png filter=lfs diff=lfs merge=lfs -text
|
354 |
+
heatmaps/gpt-3.5-0613_Textonly.png filter=lfs diff=lfs merge=lfs -text
|
355 |
+
heatmaps/gpt-3.5-turbo-0125_Textonly.png filter=lfs diff=lfs merge=lfs -text
|
356 |
+
heatmaps/gpt-35-turbo_1shot.png filter=lfs diff=lfs merge=lfs -text
|
357 |
+
heatmaps/gpt-4_vision-CoT.png filter=lfs diff=lfs merge=lfs -text
|
358 |
+
heatmaps/claude-3-opus-20240229_CoT.png filter=lfs diff=lfs merge=lfs -text
|
359 |
+
heatmaps/gpt-35-turbo_Textonly.png filter=lfs diff=lfs merge=lfs -text
|
360 |
+
heatmaps/gpt-4-1106_Textonly.png filter=lfs diff=lfs merge=lfs -text
|
361 |
+
heatmaps/gpt-4-vision-preview_vision.png filter=lfs diff=lfs merge=lfs -text
|
362 |
+
heatmaps/claude-3-haiku-20240307_vision.png filter=lfs diff=lfs merge=lfs -text
|
363 |
+
heatmaps/gemini-pro_CoT.png filter=lfs diff=lfs merge=lfs -text
|
364 |
+
heatmaps/gpt-4-turbo-2024-04-09_CoT.png filter=lfs diff=lfs merge=lfs -text
|
365 |
+
heatmaps/Mixtral-8x7B-Instruct-v0.1_Textonly.png filter=lfs diff=lfs merge=lfs -text
|
366 |
+
heatmaps/gemma-7b-it_Textonly.png filter=lfs diff=lfs merge=lfs -text
|
367 |
+
heatmaps/gpt-4-1106_CoT.png filter=lfs diff=lfs merge=lfs -text
|
368 |
+
heatmaps/CodeLlama-70b-Instruct-hf_Textonly.png filter=lfs diff=lfs merge=lfs -text
|
369 |
+
heatmaps/Mistral-7B-Instruct-v0.2_CoT.png filter=lfs diff=lfs merge=lfs -text
|
370 |
+
heatmaps/claude-3-sonnet-20240229_Textonly.png filter=lfs diff=lfs merge=lfs -text
|
371 |
+
heatmaps/dbrx-instruct_CoT.png filter=lfs diff=lfs merge=lfs -text
|
372 |
+
heatmaps/gemini-pro-vision_vision-CoT.png filter=lfs diff=lfs merge=lfs -text
|
373 |
+
heatmaps/gemma-7b-it_CoT.png filter=lfs diff=lfs merge=lfs -text
|
374 |
+
heatmaps/Qwen1.5-72B-Chat_CoT.png filter=lfs diff=lfs merge=lfs -text
|
375 |
+
heatmaps/claude-3-opus-20240229_Textonly.png filter=lfs diff=lfs merge=lfs -text
|
376 |
+
heatmaps/gpt-4-0125-preview_Textonly.png filter=lfs diff=lfs merge=lfs -text
|
377 |
+
heatmaps/claude-3-opus-20240229_vision-CoT.png filter=lfs diff=lfs merge=lfs -text
|
378 |
+
heatmaps/claude-3-opus-20240229_vision.png filter=lfs diff=lfs merge=lfs -text
|
379 |
+
heatmaps/claude-3-sonnet-20240229_vision-CoT.png filter=lfs diff=lfs merge=lfs -text
|
380 |
+
heatmaps/gemini-pro_vision.png filter=lfs diff=lfs merge=lfs -text
|
381 |
+
heatmaps/gpt-3.5-turbo-0125_1shot.png filter=lfs diff=lfs merge=lfs -text
|
382 |
+
heatmaps/gpt-4-vision-preview_vision-CoT.png filter=lfs diff=lfs merge=lfs -text
|
383 |
+
heatmaps/Llama-2-70b-chat-hf_CoT.png filter=lfs diff=lfs merge=lfs -text
|
384 |
+
heatmaps/Llama-2-70b-chat-hf_Textonly.png filter=lfs diff=lfs merge=lfs -text
|
385 |
+
heatmaps/Yi-34B-Chat_Textonly.png filter=lfs diff=lfs merge=lfs -text
|
386 |
+
heatmaps/gpt-3.5-turbo-0125_CoT.png filter=lfs diff=lfs merge=lfs -text
|
387 |
+
heatmaps/Llama-3-70b-chat-hf_Textonly.png filter=lfs diff=lfs merge=lfs -text
|
388 |
+
heatmaps/deepseek-llm-67b-chat_Textonly.png filter=lfs diff=lfs merge=lfs -text
|
389 |
+
heatmaps/gpt-3.5-0613_CoT.png filter=lfs diff=lfs merge=lfs -text
|
390 |
+
heatmaps/gpt-4-0125-preview_CoT.png filter=lfs diff=lfs merge=lfs -text
|
391 |
+
heatmaps/gpt-4_vision.png filter=lfs diff=lfs merge=lfs -text
|
392 |
+
heatmaps/claude-3-haiku-20240307_1shot.png filter=lfs diff=lfs merge=lfs -text
|
393 |
+
heatmaps/gpt-4_CoT.png filter=lfs diff=lfs merge=lfs -text
|
394 |
+
heatmaps/gpt-4_Textonly.png filter=lfs diff=lfs merge=lfs -text
|
395 |
+
heatmaps/gpt-4_CoT.jpg filter=lfs diff=lfs merge=lfs -text
|
396 |
+
heatmaps/gpt-4_vision.jpg filter=lfs diff=lfs merge=lfs -text
|
397 |
+
heatmaps/gemini-pro-vision_vision.jpg filter=lfs diff=lfs merge=lfs -text
|
398 |
+
heatmaps/gemini-pro-vision_vision-CoT.jpg filter=lfs diff=lfs merge=lfs -text
|
399 |
+
heatmaps/gpt-35-turbo_Textonly.jpg filter=lfs diff=lfs merge=lfs -text
|
400 |
+
heatmaps/gpt-35-turbo_1shot.jpg filter=lfs diff=lfs merge=lfs -text
|
401 |
+
heatmaps/gpt-35-turbo_CoT.jpg filter=lfs diff=lfs merge=lfs -text
|
402 |
+
heatmaps/gpt-4_Textonly.jpg filter=lfs diff=lfs merge=lfs -text
|
403 |
+
heatmaps/gpt-4_vision-CoT.jpg filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
@@ -34,6 +34,8 @@ def get_accuracy_dataframe(df_mother, category):
|
|
34 |
columns={"is_answer_correct": "Overall Accuracy"}, inplace=True
|
35 |
)
|
36 |
|
|
|
|
|
37 |
# Ensure all expected difficulty levels are present
|
38 |
expected_levels = [1, 2, 3, 4] # Adjust based on your data
|
39 |
for level in expected_levels:
|
|
|
34 |
columns={"is_answer_correct": "Overall Accuracy"}, inplace=True
|
35 |
)
|
36 |
|
37 |
+
model_accuracy_df['model'] = model_accuracy_df['model'].apply(lambda x: x.split('/')[-1])
|
38 |
+
|
39 |
# Ensure all expected difficulty levels are present
|
40 |
expected_levels = [1, 2, 3, 4] # Adjust based on your data
|
41 |
for level in expected_levels:
|