huangshiyu commited on
Commit
6169a19
·
1 Parent(s): 3c598b1
Files changed (4) hide show
  1. app.py +22 -6
  2. compute_accuracy.py +47 -0
  3. constants.py +13 -0
  4. eval_final_results.py +11 -0
app.py CHANGED
@@ -3,9 +3,12 @@ __all__ = ['block', 'make_clickable_model', 'make_clickable_user', 'get_submissi
3
  import gradio as gr
4
  import pandas as pd
5
  import json
 
6
 
7
  from constants import *
8
  from huggingface_hub import Repository
 
 
9
 
10
  HF_TOKEN = os.environ.get("HF_TOKEN")
11
 
@@ -37,11 +40,19 @@ def add_new_eval(
37
  if input_file is None:
38
  return "Error! Empty file!"
39
 
40
- upload_data = json.loads(input_file)
 
41
  submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN,
42
  repo_type="dataset",git_user="auto-uploader",git_email="[email protected]")
43
  submission_repo.git_pull()
44
  csv_data = pd.read_csv(CSV_DIR)
 
 
 
 
 
 
 
45
 
46
  if LLM_type == 'Other':
47
  LLM_name = LLM_name_textbox
@@ -72,11 +83,16 @@ def add_new_eval(
72
  model_date,
73
  model_link
74
  ]
75
- for key in TASK_INFO:
76
- if key in upload_data:
77
- new_data.append(round(100*upload_data[key],1))
78
- else:
79
- new_data.append(0)
 
 
 
 
 
80
  # print(new_data)
81
  # print(csv_data.loc[col-1])
82
  csv_data.loc[col] = new_data
 
3
  import gradio as gr
4
  import pandas as pd
5
  import json
6
+ import traceback
7
 
8
  from constants import *
9
  from huggingface_hub import Repository
10
+ from eval_final_results import eval_final
11
+
12
 
13
  HF_TOKEN = os.environ.get("HF_TOKEN")
14
 
 
40
  if input_file is None:
41
  return "Error! Empty file!"
42
 
43
+ # upload_data = json.loads(input_file)
44
+
45
  submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN,
46
  repo_type="dataset",git_user="auto-uploader",git_email="[email protected]")
47
  submission_repo.git_pull()
48
  csv_data = pd.read_csv(CSV_DIR)
49
+ try:
50
+ upload_data = eval_final(test_answer_file,dev_answer_file, input_file)
51
+ except:
52
+ error_message = traceback.format_exc()
53
+ print("Error:", error_message)
54
+ return
55
+
56
 
57
  if LLM_type == 'Other':
58
  LLM_name = LLM_name_textbox
 
83
  model_date,
84
  model_link
85
  ]
86
+ try:
87
+ for key in TASK_INFO:
88
+ if key in upload_data:
89
+ new_data.append(round(100*upload_data[key_map[key]],1))
90
+ else:
91
+ new_data.append(0)
92
+ except:
93
+ error_message = traceback.format_exc()
94
+ print("Error:", error_message)
95
+ return
96
  # print(new_data)
97
  # print(csv_data.loc[col-1])
98
  csv_data.loc[col] = new_data
compute_accuracy.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import jsonlines
3
+ from collections import defaultdict
4
+
5
+
6
+ def compute_accuracy(answer_file: str, video_meta_file: str):
7
+ total_qa_num = 0
8
+ total_answered_num = 0
9
+ right_num = 0
10
+
11
+ category_right = defaultdict(float)
12
+ category_total = defaultdict(float)
13
+ category_acc = defaultdict(float)
14
+
15
+ with open(answer_file) as f:
16
+ model_answers = json.load(f)
17
+
18
+ with jsonlines.open(video_meta_file) as reader:
19
+ video_meta = list(reader)
20
+ for meta_data in video_meta:
21
+ for qa in meta_data['qa']:
22
+ uid = str(qa["uid"])
23
+ if uid in model_answers:
24
+ total_answered_num += 1
25
+ model_answer = model_answers[uid]
26
+
27
+ meta_data['question_type'] = [meta_data['question_type']]
28
+ if qa["answer"] == "NA":
29
+ continue
30
+ for category in meta_data['question_type']:
31
+ category_total[category] += 1
32
+ if model_answer == qa["answer"]:
33
+ category_right[category] += 1
34
+
35
+ if model_answer == qa["answer"]:
36
+ right_num += 1
37
+ total_qa_num += 1
38
+
39
+ for key in category_total:
40
+ category_acc[key] = category_right[key] / category_total[key]
41
+
42
+ acc = float(right_num) / total_qa_num
43
+ answered_acc = float(right_num) / total_answered_num
44
+ category_acc.update({"acc": acc, "answered_acc": answered_acc, "total_qa_num": total_qa_num,
45
+ "total_answered_num": total_answered_num, "right_num": right_num})
46
+ return category_acc
47
+
constants.py CHANGED
@@ -5,12 +5,25 @@ MODEL_INFO = ["Model", "Language Model", "Date"]
5
  TASK_INFO = ["Dev Avg", "Test Avg", "MR", "LM", "CM", "MO", "AO", "RC"]
6
  AVG_INFO = ["Dev Avg", "Test Avg", "MR", "LM", "CM", "MO", "AO", "RC"]
7
 
 
 
 
 
 
 
 
 
 
 
 
8
  DATA_TITILE_TYPE = ['markdown', 'markdown', 'markdown', 'number', 'number', 'number', 'number', 'number', 'number',
9
  'number', 'number']
10
 
11
  SUBMISSION_NAME = "MotionBench_submission"
12
  SUBMISSION_URL = os.path.join("https://huggingface.co/datasets/THUDM/", SUBMISSION_NAME)
13
  CSV_DIR = "./MotionBench_submission/result.csv"
 
 
14
 
15
  COLUMN_NAMES = MODEL_INFO + TASK_INFO
16
 
 
5
  TASK_INFO = ["Dev Avg", "Test Avg", "MR", "LM", "CM", "MO", "AO", "RC"]
6
  AVG_INFO = ["Dev Avg", "Test Avg", "MR", "LM", "CM", "MO", "AO", "RC"]
7
 
8
+ key_map = {
9
+ "Dev Avg": "dev avg",
10
+ "Test Avg": "test avg",
11
+ "MR": "Motion Recognition",
12
+ "LM": "Location-related Motion",
13
+ "CM": "Camera Motion",
14
+ "MO": "Motion-related Objects",
15
+ "AO": "Action Order",
16
+ "RC": "Repetition Count"
17
+ }
18
+
19
  DATA_TITILE_TYPE = ['markdown', 'markdown', 'markdown', 'number', 'number', 'number', 'number', 'number', 'number',
20
  'number', 'number']
21
 
22
  SUBMISSION_NAME = "MotionBench_submission"
23
  SUBMISSION_URL = os.path.join("https://huggingface.co/datasets/THUDM/", SUBMISSION_NAME)
24
  CSV_DIR = "./MotionBench_submission/result.csv"
25
+ test_answer_file = "./MotionBench_submission/test_ans_video_info.meta.jsonl"
26
+ dev_answer_file = "./MotionBench_submission/dev_ans_video_info.meta.jsonl"
27
 
28
  COLUMN_NAMES = MODEL_INFO + TASK_INFO
29
 
eval_final_results.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from compute_accuracy import compute_accuracy
2
+
3
+ def eval_final(test_metafile,dev_metafile,to_eval):
4
+ print("Computing accuracy...")
5
+ result_test = compute_accuracy(to_eval, test_metafile)
6
+ result_dev = compute_accuracy(to_eval, dev_metafile)
7
+
8
+ output = {"dev avg": result_dev['answered_acc'],
9
+ "test avg": result_test['answered_acc'],
10
+ **result_test}
11
+