Spaces:
Running
Running
huangshiyu
commited on
Commit
·
6169a19
1
Parent(s):
3c598b1
update
Browse files- app.py +22 -6
- compute_accuracy.py +47 -0
- constants.py +13 -0
- eval_final_results.py +11 -0
app.py
CHANGED
@@ -3,9 +3,12 @@ __all__ = ['block', 'make_clickable_model', 'make_clickable_user', 'get_submissi
|
|
3 |
import gradio as gr
|
4 |
import pandas as pd
|
5 |
import json
|
|
|
6 |
|
7 |
from constants import *
|
8 |
from huggingface_hub import Repository
|
|
|
|
|
9 |
|
10 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
11 |
|
@@ -37,11 +40,19 @@ def add_new_eval(
|
|
37 |
if input_file is None:
|
38 |
return "Error! Empty file!"
|
39 |
|
40 |
-
upload_data = json.loads(input_file)
|
|
|
41 |
submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN,
|
42 |
repo_type="dataset",git_user="auto-uploader",git_email="[email protected]")
|
43 |
submission_repo.git_pull()
|
44 |
csv_data = pd.read_csv(CSV_DIR)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
if LLM_type == 'Other':
|
47 |
LLM_name = LLM_name_textbox
|
@@ -72,11 +83,16 @@ def add_new_eval(
|
|
72 |
model_date,
|
73 |
model_link
|
74 |
]
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
|
|
|
|
|
|
|
|
|
|
80 |
# print(new_data)
|
81 |
# print(csv_data.loc[col-1])
|
82 |
csv_data.loc[col] = new_data
|
|
|
3 |
import gradio as gr
|
4 |
import pandas as pd
|
5 |
import json
|
6 |
+
import traceback
|
7 |
|
8 |
from constants import *
|
9 |
from huggingface_hub import Repository
|
10 |
+
from eval_final_results import eval_final
|
11 |
+
|
12 |
|
13 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
14 |
|
|
|
40 |
if input_file is None:
|
41 |
return "Error! Empty file!"
|
42 |
|
43 |
+
# upload_data = json.loads(input_file)
|
44 |
+
|
45 |
submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN,
|
46 |
repo_type="dataset",git_user="auto-uploader",git_email="[email protected]")
|
47 |
submission_repo.git_pull()
|
48 |
csv_data = pd.read_csv(CSV_DIR)
|
49 |
+
try:
|
50 |
+
upload_data = eval_final(test_answer_file,dev_answer_file, input_file)
|
51 |
+
except:
|
52 |
+
error_message = traceback.format_exc()
|
53 |
+
print("Error:", error_message)
|
54 |
+
return
|
55 |
+
|
56 |
|
57 |
if LLM_type == 'Other':
|
58 |
LLM_name = LLM_name_textbox
|
|
|
83 |
model_date,
|
84 |
model_link
|
85 |
]
|
86 |
+
try:
|
87 |
+
for key in TASK_INFO:
|
88 |
+
if key in upload_data:
|
89 |
+
new_data.append(round(100*upload_data[key_map[key]],1))
|
90 |
+
else:
|
91 |
+
new_data.append(0)
|
92 |
+
except:
|
93 |
+
error_message = traceback.format_exc()
|
94 |
+
print("Error:", error_message)
|
95 |
+
return
|
96 |
# print(new_data)
|
97 |
# print(csv_data.loc[col-1])
|
98 |
csv_data.loc[col] = new_data
|
compute_accuracy.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import jsonlines
|
3 |
+
from collections import defaultdict
|
4 |
+
|
5 |
+
|
6 |
+
def compute_accuracy(answer_file: str, video_meta_file: str):
|
7 |
+
total_qa_num = 0
|
8 |
+
total_answered_num = 0
|
9 |
+
right_num = 0
|
10 |
+
|
11 |
+
category_right = defaultdict(float)
|
12 |
+
category_total = defaultdict(float)
|
13 |
+
category_acc = defaultdict(float)
|
14 |
+
|
15 |
+
with open(answer_file) as f:
|
16 |
+
model_answers = json.load(f)
|
17 |
+
|
18 |
+
with jsonlines.open(video_meta_file) as reader:
|
19 |
+
video_meta = list(reader)
|
20 |
+
for meta_data in video_meta:
|
21 |
+
for qa in meta_data['qa']:
|
22 |
+
uid = str(qa["uid"])
|
23 |
+
if uid in model_answers:
|
24 |
+
total_answered_num += 1
|
25 |
+
model_answer = model_answers[uid]
|
26 |
+
|
27 |
+
meta_data['question_type'] = [meta_data['question_type']]
|
28 |
+
if qa["answer"] == "NA":
|
29 |
+
continue
|
30 |
+
for category in meta_data['question_type']:
|
31 |
+
category_total[category] += 1
|
32 |
+
if model_answer == qa["answer"]:
|
33 |
+
category_right[category] += 1
|
34 |
+
|
35 |
+
if model_answer == qa["answer"]:
|
36 |
+
right_num += 1
|
37 |
+
total_qa_num += 1
|
38 |
+
|
39 |
+
for key in category_total:
|
40 |
+
category_acc[key] = category_right[key] / category_total[key]
|
41 |
+
|
42 |
+
acc = float(right_num) / total_qa_num
|
43 |
+
answered_acc = float(right_num) / total_answered_num
|
44 |
+
category_acc.update({"acc": acc, "answered_acc": answered_acc, "total_qa_num": total_qa_num,
|
45 |
+
"total_answered_num": total_answered_num, "right_num": right_num})
|
46 |
+
return category_acc
|
47 |
+
|
constants.py
CHANGED
@@ -5,12 +5,25 @@ MODEL_INFO = ["Model", "Language Model", "Date"]
|
|
5 |
TASK_INFO = ["Dev Avg", "Test Avg", "MR", "LM", "CM", "MO", "AO", "RC"]
|
6 |
AVG_INFO = ["Dev Avg", "Test Avg", "MR", "LM", "CM", "MO", "AO", "RC"]
|
7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
DATA_TITILE_TYPE = ['markdown', 'markdown', 'markdown', 'number', 'number', 'number', 'number', 'number', 'number',
|
9 |
'number', 'number']
|
10 |
|
11 |
SUBMISSION_NAME = "MotionBench_submission"
|
12 |
SUBMISSION_URL = os.path.join("https://huggingface.co/datasets/THUDM/", SUBMISSION_NAME)
|
13 |
CSV_DIR = "./MotionBench_submission/result.csv"
|
|
|
|
|
14 |
|
15 |
COLUMN_NAMES = MODEL_INFO + TASK_INFO
|
16 |
|
|
|
5 |
TASK_INFO = ["Dev Avg", "Test Avg", "MR", "LM", "CM", "MO", "AO", "RC"]
|
6 |
AVG_INFO = ["Dev Avg", "Test Avg", "MR", "LM", "CM", "MO", "AO", "RC"]
|
7 |
|
8 |
+
key_map = {
|
9 |
+
"Dev Avg": "dev avg",
|
10 |
+
"Test Avg": "test avg",
|
11 |
+
"MR": "Motion Recognition",
|
12 |
+
"LM": "Location-related Motion",
|
13 |
+
"CM": "Camera Motion",
|
14 |
+
"MO": "Motion-related Objects",
|
15 |
+
"AO": "Action Order",
|
16 |
+
"RC": "Repetition Count"
|
17 |
+
}
|
18 |
+
|
19 |
DATA_TITILE_TYPE = ['markdown', 'markdown', 'markdown', 'number', 'number', 'number', 'number', 'number', 'number',
|
20 |
'number', 'number']
|
21 |
|
22 |
SUBMISSION_NAME = "MotionBench_submission"
|
23 |
SUBMISSION_URL = os.path.join("https://huggingface.co/datasets/THUDM/", SUBMISSION_NAME)
|
24 |
CSV_DIR = "./MotionBench_submission/result.csv"
|
25 |
+
test_answer_file = "./MotionBench_submission/test_ans_video_info.meta.jsonl"
|
26 |
+
dev_answer_file = "./MotionBench_submission/dev_ans_video_info.meta.jsonl"
|
27 |
|
28 |
COLUMN_NAMES = MODEL_INFO + TASK_INFO
|
29 |
|
eval_final_results.py
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from compute_accuracy import compute_accuracy
|
2 |
+
|
3 |
+
def eval_final(test_metafile,dev_metafile,to_eval):
|
4 |
+
print("Computing accuracy...")
|
5 |
+
result_test = compute_accuracy(to_eval, test_metafile)
|
6 |
+
result_dev = compute_accuracy(to_eval, dev_metafile)
|
7 |
+
|
8 |
+
output = {"dev avg": result_dev['answered_acc'],
|
9 |
+
"test avg": result_test['answered_acc'],
|
10 |
+
**result_test}
|
11 |
+
|