import json import os import glob import argparse import csv def chatgpt_json(merge_file): # chat results merge_data = merge_file.decode("utf-8") merge_data = eval(merge_data) correct_answer_file = 'file/ANSWER.json' with open(correct_answer_file, 'r', encoding='utf-8') as f: correct_answer_data = json.load(f) dataset_scores_dict = {} for dataset_name, item in merge_data.items(): total_nums = len(item) correct = 0 # assert len(item) >= len(correct_answer_data[dataset_name]), f'Video-Bench-Input.json---{dataset_name}---is incomplete!' for id, sub_item in item.items(): if sub_item['output_chatgpt_choice'] == correct_answer_data[dataset_name][id]['answer']: correct += 1 dataset_scores_dict[dataset_name] = round(correct / total_nums * 100, 2) return dataset_scores_dict def compute_scores(merge_file): dataset_score_dict = chatgpt_json(merge_file) dataset_weight = { 1: { "ActivityNet": 1, "MSVD": 1, "MSRVTT": 1, "TGIF": 1, "Youcook2": 1, "Ucfcrime": 1, "MOT": 0.5, }, 2: { "TVQA": 1, "MV": 1, "NBA": 1, }, 3: { "Driving-exam": 0.5, "Driving-decision-making": 1, "SQA3D": 1, } } # Video-exclusive Understanding score exclusive_understanding_weight = dataset_weight[1] weights_sum = sum(exclusive_understanding_weight.values()) exclusive_understanding_score = 0 # import ipdb; ipdb.set_trace() for dataset_name, weight in exclusive_understanding_weight.items(): exclusive_understanding_score += weight * dataset_score_dict[dataset_name] / weights_sum # Prior Knowledge-based Question-answer prior_QA_weight = dataset_weight[2] weights_sum = sum(prior_QA_weight.values()) prior_QA_score = 0 for dataset_name, weight in prior_QA_weight.items(): prior_QA_score += weight * dataset_score_dict[dataset_name] / weights_sum # Comprehension and Decision-making com_and_dec_QA_weight = dataset_weight[3] weights_sum = sum(com_and_dec_QA_weight.values()) com_and_dec_QA_score = 0 for dataset_name, weight in com_and_dec_QA_weight.items(): com_and_dec_QA_score += weight * dataset_score_dict[dataset_name] / weights_sum dataset_score_dict['Exclusive_understanding'] = exclusive_understanding_score dataset_score_dict['Prior_Knowledge'] = prior_QA_score dataset_score_dict['Comprehension_and_Decision-making'] = com_and_dec_QA_score # final score final_score = sum([exclusive_understanding_score, prior_QA_score, com_and_dec_QA_score]) / 3 dataset_score_dict['final_score'] = final_score # print(dataset_score_dict) # with open(args.score_output_file, 'w', encoding='utf-8') as f: # json.dump(dataset_score_dict, f, indent=2) # print(f'{args.score_output_file} is saved!') # ======================== data = [ ["Avg. All", "Avg. Video-Exclusive", "Avg. Prior-Knowledge QA", "Avg. Decision-Making", "ActivityNet", "MSVD", "MSRVTT", "TGIF", "Youcook2", "Ucfcrime", "MOT", "TVQA", "MV", "NBA", "Driving-exam", "Driving-decision-making", "SQA3D"], [final_score, exclusive_understanding_score, prior_QA_score, com_and_dec_QA_score, dataset_score_dict['ActivityNet'], dataset_score_dict["MSVD"], dataset_score_dict['MSRVTT'], dataset_score_dict['TGIF'], dataset_score_dict['Youcook2'], dataset_score_dict['Ucfcrime'], dataset_score_dict['MOT'], dataset_score_dict['TVQA'], dataset_score_dict['MV'], dataset_score_dict['NBA'], dataset_score_dict['Driving-exam'], dataset_score_dict['Driving-decision-making'], dataset_score_dict['SQA3D'], ], ] return data