taesiri commited on
Commit
1dca33f
1 Parent(s): 3b006a2
Files changed (2) hide show
  1. .gitattributes +67 -0
  2. app.py +2 -0
.gitattributes CHANGED
@@ -334,3 +334,70 @@ heatmaps/Yi-34B-Chat_CoT.jpg filter=lfs diff=lfs merge=lfs -text
334
  heatmaps/claude-3-opus-20240229_vision.jpg filter=lfs diff=lfs merge=lfs -text
335
  heatmaps/claude-3-sonnet-20240229_CoT.jpg filter=lfs diff=lfs merge=lfs -text
336
  heatmaps/deepseek-llm-67b-chat_Textonly.jpg filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
334
  heatmaps/claude-3-opus-20240229_vision.jpg filter=lfs diff=lfs merge=lfs -text
335
  heatmaps/claude-3-sonnet-20240229_CoT.jpg filter=lfs diff=lfs merge=lfs -text
336
  heatmaps/deepseek-llm-67b-chat_Textonly.jpg filter=lfs diff=lfs merge=lfs -text
337
+ heatmaps/CodeLlama-70b-Instruct-hf_CoT.png filter=lfs diff=lfs merge=lfs -text
338
+ heatmaps/Mixtral-8x7B-Instruct-v0.1_CoT.png filter=lfs diff=lfs merge=lfs -text
339
+ heatmaps/claude-3-haiku-20240307_Textonly.png filter=lfs diff=lfs merge=lfs -text
340
+ heatmaps/Yi-34B-Chat_CoT.png filter=lfs diff=lfs merge=lfs -text
341
+ heatmaps/claude-3-sonnet-20240229_CoT.png filter=lfs diff=lfs merge=lfs -text
342
+ heatmaps/claude-3-sonnet-20240229_vision.png filter=lfs diff=lfs merge=lfs -text
343
+ heatmaps/gemini-pro_vision-CoT.png filter=lfs diff=lfs merge=lfs -text
344
+ heatmaps/gpt-35-turbo_CoT.png filter=lfs diff=lfs merge=lfs -text
345
+ heatmaps/Llama-3-70b-chat-hf_CoT.png filter=lfs diff=lfs merge=lfs -text
346
+ heatmaps/Mistral-7B-Instruct-v0.2_Textonly.png filter=lfs diff=lfs merge=lfs -text
347
+ heatmaps/claude-3-haiku-20240307_CoT.png filter=lfs diff=lfs merge=lfs -text
348
+ heatmaps/deepseek-llm-67b-chat_CoT.png filter=lfs diff=lfs merge=lfs -text
349
+ heatmaps/gpt-4-turbo-2024-04-09_Textonly.png filter=lfs diff=lfs merge=lfs -text
350
+ heatmaps/Qwen1.5-72B-Chat_Textonly.png filter=lfs diff=lfs merge=lfs -text
351
+ heatmaps/dbrx-instruct_Textonly.png filter=lfs diff=lfs merge=lfs -text
352
+ heatmaps/claude-3-haiku-20240307_vision-CoT.png filter=lfs diff=lfs merge=lfs -text
353
+ heatmaps/gemini-pro-vision_vision.png filter=lfs diff=lfs merge=lfs -text
354
+ heatmaps/gpt-3.5-0613_Textonly.png filter=lfs diff=lfs merge=lfs -text
355
+ heatmaps/gpt-3.5-turbo-0125_Textonly.png filter=lfs diff=lfs merge=lfs -text
356
+ heatmaps/gpt-35-turbo_1shot.png filter=lfs diff=lfs merge=lfs -text
357
+ heatmaps/gpt-4_vision-CoT.png filter=lfs diff=lfs merge=lfs -text
358
+ heatmaps/claude-3-opus-20240229_CoT.png filter=lfs diff=lfs merge=lfs -text
359
+ heatmaps/gpt-35-turbo_Textonly.png filter=lfs diff=lfs merge=lfs -text
360
+ heatmaps/gpt-4-1106_Textonly.png filter=lfs diff=lfs merge=lfs -text
361
+ heatmaps/gpt-4-vision-preview_vision.png filter=lfs diff=lfs merge=lfs -text
362
+ heatmaps/claude-3-haiku-20240307_vision.png filter=lfs diff=lfs merge=lfs -text
363
+ heatmaps/gemini-pro_CoT.png filter=lfs diff=lfs merge=lfs -text
364
+ heatmaps/gpt-4-turbo-2024-04-09_CoT.png filter=lfs diff=lfs merge=lfs -text
365
+ heatmaps/Mixtral-8x7B-Instruct-v0.1_Textonly.png filter=lfs diff=lfs merge=lfs -text
366
+ heatmaps/gemma-7b-it_Textonly.png filter=lfs diff=lfs merge=lfs -text
367
+ heatmaps/gpt-4-1106_CoT.png filter=lfs diff=lfs merge=lfs -text
368
+ heatmaps/CodeLlama-70b-Instruct-hf_Textonly.png filter=lfs diff=lfs merge=lfs -text
369
+ heatmaps/Mistral-7B-Instruct-v0.2_CoT.png filter=lfs diff=lfs merge=lfs -text
370
+ heatmaps/claude-3-sonnet-20240229_Textonly.png filter=lfs diff=lfs merge=lfs -text
371
+ heatmaps/dbrx-instruct_CoT.png filter=lfs diff=lfs merge=lfs -text
372
+ heatmaps/gemini-pro-vision_vision-CoT.png filter=lfs diff=lfs merge=lfs -text
373
+ heatmaps/gemma-7b-it_CoT.png filter=lfs diff=lfs merge=lfs -text
374
+ heatmaps/Qwen1.5-72B-Chat_CoT.png filter=lfs diff=lfs merge=lfs -text
375
+ heatmaps/claude-3-opus-20240229_Textonly.png filter=lfs diff=lfs merge=lfs -text
376
+ heatmaps/gpt-4-0125-preview_Textonly.png filter=lfs diff=lfs merge=lfs -text
377
+ heatmaps/claude-3-opus-20240229_vision-CoT.png filter=lfs diff=lfs merge=lfs -text
378
+ heatmaps/claude-3-opus-20240229_vision.png filter=lfs diff=lfs merge=lfs -text
379
+ heatmaps/claude-3-sonnet-20240229_vision-CoT.png filter=lfs diff=lfs merge=lfs -text
380
+ heatmaps/gemini-pro_vision.png filter=lfs diff=lfs merge=lfs -text
381
+ heatmaps/gpt-3.5-turbo-0125_1shot.png filter=lfs diff=lfs merge=lfs -text
382
+ heatmaps/gpt-4-vision-preview_vision-CoT.png filter=lfs diff=lfs merge=lfs -text
383
+ heatmaps/Llama-2-70b-chat-hf_CoT.png filter=lfs diff=lfs merge=lfs -text
384
+ heatmaps/Llama-2-70b-chat-hf_Textonly.png filter=lfs diff=lfs merge=lfs -text
385
+ heatmaps/Yi-34B-Chat_Textonly.png filter=lfs diff=lfs merge=lfs -text
386
+ heatmaps/gpt-3.5-turbo-0125_CoT.png filter=lfs diff=lfs merge=lfs -text
387
+ heatmaps/Llama-3-70b-chat-hf_Textonly.png filter=lfs diff=lfs merge=lfs -text
388
+ heatmaps/deepseek-llm-67b-chat_Textonly.png filter=lfs diff=lfs merge=lfs -text
389
+ heatmaps/gpt-3.5-0613_CoT.png filter=lfs diff=lfs merge=lfs -text
390
+ heatmaps/gpt-4-0125-preview_CoT.png filter=lfs diff=lfs merge=lfs -text
391
+ heatmaps/gpt-4_vision.png filter=lfs diff=lfs merge=lfs -text
392
+ heatmaps/claude-3-haiku-20240307_1shot.png filter=lfs diff=lfs merge=lfs -text
393
+ heatmaps/gpt-4_CoT.png filter=lfs diff=lfs merge=lfs -text
394
+ heatmaps/gpt-4_Textonly.png filter=lfs diff=lfs merge=lfs -text
395
+ heatmaps/gpt-4_CoT.jpg filter=lfs diff=lfs merge=lfs -text
396
+ heatmaps/gpt-4_vision.jpg filter=lfs diff=lfs merge=lfs -text
397
+ heatmaps/gemini-pro-vision_vision.jpg filter=lfs diff=lfs merge=lfs -text
398
+ heatmaps/gemini-pro-vision_vision-CoT.jpg filter=lfs diff=lfs merge=lfs -text
399
+ heatmaps/gpt-35-turbo_Textonly.jpg filter=lfs diff=lfs merge=lfs -text
400
+ heatmaps/gpt-35-turbo_1shot.jpg filter=lfs diff=lfs merge=lfs -text
401
+ heatmaps/gpt-35-turbo_CoT.jpg filter=lfs diff=lfs merge=lfs -text
402
+ heatmaps/gpt-4_Textonly.jpg filter=lfs diff=lfs merge=lfs -text
403
+ heatmaps/gpt-4_vision-CoT.jpg filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -34,6 +34,8 @@ def get_accuracy_dataframe(df_mother, category):
34
  columns={"is_answer_correct": "Overall Accuracy"}, inplace=True
35
  )
36
 
 
 
37
  # Ensure all expected difficulty levels are present
38
  expected_levels = [1, 2, 3, 4] # Adjust based on your data
39
  for level in expected_levels:
 
34
  columns={"is_answer_correct": "Overall Accuracy"}, inplace=True
35
  )
36
 
37
+ model_accuracy_df['model'] = model_accuracy_df['model'].apply(lambda x: x.split('/')[-1])
38
+
39
  # Ensure all expected difficulty levels are present
40
  expected_levels = [1, 2, 3, 4] # Adjust based on your data
41
  for level in expected_levels: