Rohan Wadhawan commited on
Commit
e61d9ba
·
0 Parent(s):

ConTextual Leaderboard setup

Browse files
Files changed (8) hide show
  1. .DS_Store +0 -0
  2. .gitattributes +35 -0
  3. .gitignore +1 -0
  4. README.md +13 -0
  5. app.py +282 -0
  6. content.py +100 -0
  7. requirements.txt +5 -0
  8. scorer.py +50 -0
.DS_Store ADDED
Binary file (6.15 kB). View file
 
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .DS_Store
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Contextual Leaderboard
3
+ emoji: 🐨
4
+ colorFrom: purple
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: 4.16.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,282 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import csv
4
+ import datetime
5
+ from email.utils import parseaddr
6
+
7
+ import gradio as gr
8
+ import pandas as pd
9
+ import numpy as np
10
+
11
+ from datasets import load_dataset
12
+ from apscheduler.schedulers.background import BackgroundScheduler
13
+ from huggingface_hub import HfApi
14
+
15
+ from scorer import instruction_scorer
16
+ from content import format_error, format_warning, format_log, TITLE, INTRODUCTION_TEXT, SUBMISSION_TEXT, CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, model_hyperlink
17
+
18
+ TOKEN = os.environ.get("TOKEN", None)
19
+ OWNER="ucla-contextual"
20
+ TEST_DATASET = f"{OWNER}/contextual_test"
21
+ VAL_DATASET = f"{OWNER}/contextual_val"
22
+ SUBMISSION_DATASET = f"{OWNER}/submissions_internal"
23
+ CONTACT_DATASET = f"{OWNER}/contact_info"
24
+ RESULTS_DATASET = f"{OWNER}/results"
25
+ LEADERBOARD_PATH = f"{OWNER}/leaderboard"
26
+ api = HfApi()
27
+
28
+ YEAR_VERSION = "2024"
29
+
30
+ def read_json_file(filepath):
31
+ with open(filepath) as infile:
32
+ data_dict = json.load(infile)
33
+ return data_dict
34
+
35
+ def save_json_file(filepath, data_dict):
36
+ with open(filepath, "w") as outfile:
37
+ json.dump(data_dict, outfile)
38
+
39
+ os.makedirs("scored", exist_ok=True)
40
+
41
+ test_data_files = {"test": "contextual_test.csv"}
42
+ test_dataset = load_dataset(TEST_DATASET, data_files=test_data_files , token=TOKEN, download_mode="force_redownload", ignore_verifications=True)
43
+
44
+ val_data_files = {"val": "contextual_val.csv"}
45
+ val_dataset = load_dataset(VAL_DATASET, data_files=val_data_files , token=TOKEN, download_mode="force_redownload", ignore_verifications=True)
46
+
47
+ results_data_files = {"test": "contextual_test_results.csv", "val": "contextual_val_results.csv"}
48
+ results = load_dataset(RESULTS_DATASET, data_files=
49
+ results_data_files, token=TOKEN, download_mode="force_redownload", ignore_verifications=True)
50
+
51
+ contacts_data_files = {"contacts": "contacts.csv"}
52
+ contact_infos = load_dataset(CONTACT_DATASET, data_files=contacts_data_files, token=TOKEN, download_mode="force_redownload", ignore_verifications=True)
53
+
54
+ def get_dataframe_from_results(results, split):
55
+ df = results[split].to_pandas()
56
+ df.drop(columns=['URL'], inplace=True)
57
+ df = df.sort_values(by=["All"], ascending=False)
58
+ return df
59
+
60
+ test_dataset_dataframe = test_dataset["test"].to_pandas()
61
+ val_dataset_dataframe = val_dataset["val"].to_pandas()
62
+
63
+ contacts_dataframe = contact_infos["contacts"].to_pandas()
64
+
65
+ val_results_dataframe = get_dataframe_from_results(results=results, split="val")
66
+ test_results_dataframe = get_dataframe_from_results(results=results, split="test")
67
+
68
+ def restart_space():
69
+ api.restart_space(repo_id=LEADERBOARD_PATH, token=TOKEN)
70
+
71
+ TYPES = ["markdown", "markdown", "markdown", "number", "number", "number","number", "number", "number", "number", "number", "number"]
72
+
73
+ def add_new_eval(
74
+ model: str,
75
+ method: str,
76
+ url: str,
77
+ path_to_file: str,
78
+ organisation: str,
79
+ mail: str,
80
+ ):
81
+ print("printing all inputs:", model, method, url, path_to_file, organisation, mail)
82
+
83
+ if len(model)==0:
84
+ print("model none")
85
+ raise gr.Error("Please provide a model name. Field empty!")
86
+
87
+ if len(method)==0:
88
+ print("method none")
89
+ raise gr.Error("Please provide a method. Field empty!")
90
+
91
+ if len(organisation)==0:
92
+ print("org none")
93
+ raise gr.Error("Please provide organisation information. Field empty!")
94
+
95
+ # Very basic email parsing
96
+ _, parsed_mail = parseaddr(mail)
97
+ if not "@" in parsed_mail:
98
+ print("email here")
99
+ raise gr.Error("Please provide a valid email address.")
100
+
101
+
102
+ # Check if the combination model/org already exists and prints a warning message if yes
103
+ if model.lower() in set([m.lower() for m in results["val"]["Model"]]) and organisation.lower() in set([o.lower() for o in results["val"]["Organisation"]]):
104
+ print("model org combo here")
105
+ raise gr.Error("This model has been already submitted.")
106
+
107
+ if path_to_file is None:
108
+ print("file missing here")
109
+ raise gr.Error("Please attach a file.")
110
+
111
+ tmp_file_output = read_json_file(path_to_file.name)
112
+
113
+ if len(tmp_file_output.keys())!=1:
114
+ print("file format wrong here")
115
+ raise gr.Error("Submission file format incorrect. Please refer to the format description!")
116
+
117
+ tmp_output_key = list(tmp_file_output.keys())[0]
118
+ if len(tmp_file_output[tmp_output_key].keys())!=100:
119
+ print("file not 100 here")
120
+ raise gr.Error("File must contain exactly 100 predictions.")
121
+
122
+ # Save submitted file
123
+ time_atm = datetime.datetime.today()
124
+ api.upload_file(
125
+ repo_id=SUBMISSION_DATASET,
126
+ path_or_fileobj=path_to_file.name,
127
+ path_in_repo=f"{organisation}/{model}/{YEAR_VERSION}_raw_{time_atm}.json",
128
+ repo_type="dataset",
129
+ token=TOKEN
130
+ )
131
+
132
+ # Compute score
133
+ file_path = path_to_file.name
134
+ scores = instruction_scorer(val_dataset_dataframe, file_path , model)
135
+
136
+ path_or_fileobj=f"scored/{organisation}_{model}.json"
137
+ save_json_file(path_or_fileobj, scores)
138
+
139
+ # Save scored file
140
+ api.upload_file(
141
+ repo_id=SUBMISSION_DATASET,
142
+ path_or_fileobj=path_or_fileobj,
143
+ path_in_repo=f"{organisation}/{model}/{YEAR_VERSION}_scored_{time_atm}.json",
144
+ repo_type="dataset",
145
+ token=TOKEN
146
+ )
147
+
148
+ # Actual submission
149
+ eval_entry = {
150
+ "Model": model,
151
+ "Method":method,
152
+ "Organisation": organisation,
153
+ "URL": url,
154
+ "All":scores["average"],
155
+ "Time":scores["time"],
156
+ "Shopping":scores["shopping"],
157
+ "Navigation":scores["navigation-transportation"],
158
+ "Abstract":scores["abstract"],
159
+ "Application Usage":scores["app"],
160
+ "Web Usage":scores["web"],
161
+ "Infographic":scores["infographics"],
162
+ "Miscellaneous Natural Scenes": scores["misc"]
163
+ }
164
+
165
+ val_results_dataframe = get_dataframe_from_results(results=results, split="val")
166
+ val_results_dataframe = pd.concat([val_results_dataframe, pd.DataFrame([eval_entry])], ignore_index=True)
167
+ val_results_dataframe.to_csv('contextual_val_results.csv', index=False)
168
+
169
+ api.upload_file(
170
+ repo_id=RESULTS_DATASET,
171
+ path_or_fileobj="contextual_val_results.csv",
172
+ path_in_repo=f"contextual_val_results.csv",
173
+ repo_type="dataset",
174
+ token=TOKEN
175
+ )
176
+
177
+ contact_info = {
178
+ "Model": model,
179
+ "URL": url,
180
+ "Organisation": organisation,
181
+ "Mail": mail,
182
+ }
183
+
184
+ contacts_dataframe = contact_infos["contacts"].to_pandas()
185
+ contacts_dataframe = pd.concat([contacts_dataframe, pd.DataFrame([contact_info])], ignore_index=True)
186
+ contacts_dataframe.to_csv('contacts.csv', index=False)
187
+
188
+ api.upload_file(
189
+ repo_id=CONTACT_DATASET,
190
+ path_or_fileobj="contacts.csv",
191
+ path_in_repo=f"contacts.csv",
192
+ repo_type="dataset",
193
+ token=TOKEN
194
+ )
195
+
196
+ return format_log(f"Model {model} submitted by {organisation} successfully! \nPlease refresh the val leaderboard, and wait a bit to see the score displayed")
197
+
198
+
199
+ def refresh():
200
+ results_data_files = {"test": "contextual_test_results.csv", "val": "contextual_val_results.csv"}
201
+ results = load_dataset(RESULTS_DATASET, data_files=
202
+ results_data_files, token=TOKEN, download_mode="force_redownload", ignore_verifications=True)
203
+ val_results_dataframe = get_dataframe_from_results(results=results, split="val")
204
+ test_results_dataframe = get_dataframe_from_results(results=results, split="test")
205
+ return val_results_dataframe, test_results_dataframe
206
+
207
+ def upload_file(files):
208
+ file_paths = [file.name for file in files]
209
+ return file_paths
210
+
211
+
212
+ demo = gr.Blocks()
213
+ with demo:
214
+ gr.HTML(TITLE)
215
+ # gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
216
+
217
+ with gr.Row():
218
+ with gr.Accordion("🧐 Introduction", open=False):
219
+ gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
220
+
221
+ with gr.Row():
222
+ with gr.Accordion("🎯 Submission Guidelines", open=False):
223
+ gr.Markdown(SUBMISSION_TEXT, elem_classes="markdown-text")
224
+
225
+ with gr.Row():
226
+ with gr.Accordion("📙 Citation", open=False):
227
+ citation_button = gr.TextArea(
228
+ value=CITATION_BUTTON_TEXT,
229
+ label=CITATION_BUTTON_LABEL,
230
+ elem_id="citation-button",
231
+ )
232
+ with gr.Tab("Results: Test"):
233
+ leaderboard_table_test = gr.components.Dataframe(
234
+ value=test_results_dataframe, datatype=TYPES, interactive=False,
235
+ column_widths=["20%"]
236
+ )
237
+ with gr.Tab("Results: Val"):
238
+ leaderboard_table_val = gr.components.Dataframe(
239
+ value=val_results_dataframe, datatype=TYPES, interactive=False,
240
+ column_widths=["20%"]
241
+ )
242
+
243
+ refresh_button = gr.Button("Refresh")
244
+ refresh_button.click(
245
+ refresh,
246
+ inputs=[],
247
+ outputs=[
248
+ leaderboard_table_val,
249
+ leaderboard_table_test,
250
+ ],
251
+ )
252
+ with gr.Accordion("Submit a new model for evaluation"):
253
+ with gr.Row():
254
+ with gr.Column():
255
+ model_name_textbox = gr.Textbox(label="Model name", type='text')
256
+ method_textbox = gr.Textbox(label="Method (LMM or Aug LLM or any other)", type='text')
257
+ url_textbox = gr.Textbox(label="URL to model information", type='text')
258
+ with gr.Column():
259
+ organisation = gr.Textbox(label="Organisation", type='text')
260
+ mail = gr.Textbox(label="Contact email (will be stored privately, & used if there is an issue with your submission)", type='email')
261
+ file_output = gr.File()
262
+
263
+
264
+ submit_button = gr.Button("Submit Eval")
265
+ submission_result = gr.Markdown()
266
+ submit_button.click(
267
+ add_new_eval,
268
+ [
269
+ model_name_textbox,
270
+ method_textbox,
271
+ url_textbox,
272
+ file_output,
273
+ organisation,
274
+ mail
275
+ ],
276
+ submission_result,
277
+ )
278
+
279
+ scheduler = BackgroundScheduler()
280
+ scheduler.add_job(restart_space, "interval", seconds=3600)
281
+ scheduler.start()
282
+ demo.launch(debug=True)
content.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ TITLE = """<h1 align="center" id="space-title">ConTextual Leaderboard</h1>"""
2
+
3
+ INTRODUCTION_TEXT = """
4
+ Models are becoming quite good at understanding text on its own, but what about text in images, which gives important contextual information? For example, navigating a map, or understanding a meme? The ability to reason about the interactions between the text and visual context in images can power many real-world applications, such as AI assistants, or tools to assist the visually impaired. We refer to these tasks as context-sensitive text-rich visual reasoning tasks.
5
+
6
+ At the moment, most evaluations of instruction-tuned large multimodal models (LMMs) focus on testing how well models can respond to human instructions posed as questions or imperative tasks over images… but not how well they understand context-sensitive text-rich scenes! That’s why we created ConTextual, a Context-sensitive Text-rich visuaL reasoning dataset for evaluating LMMs. We also released a leaderboard, so that the community can see for themselves which models are the best at this task. (See our [paper](https://arxiv.org/abs/2401.13311) for more details.)
7
+
8
+ ## Data
9
+ ConTextual comprises **506 examples covering 8 real-world visual scenarios** - *Time Reading, Shopping, Navigation, Abstract Scenes, Mobile Application, Webpages, Infographics and Miscellaneous Natural Scenes*. Each sample consists of:
10
+ - A text-rich image
11
+ - A human-written instruction (question or imperative task)
12
+ - A human-written reference response
13
+
14
+
15
+ ### Data Access
16
+ ConTextual data can be found on HuggingFace and GitHub.
17
+ - HuggingFace
18
+ - [Test](https://huggingface.co/datasets/ucla-contextual/contextual_test)
19
+ - [Val](https://huggingface.co/datasets/ucla-contextual/contextual_val)
20
+ - Github
21
+ - [Test](https://github.com/rohan598/ConTextual/blob/main/data/contextual_test.csv)
22
+ - [Val](https://github.com/rohan598/ConTextual/blob/main/data/contextual_val.csv)
23
+
24
+ ### Data Format
25
+ ```
26
+ {
27
+ "image_url": [string] url to the hosted image,
28
+ "instruction" [string] instruction text,
29
+ "response": [string] response text (only provided for samples in the val subset),
30
+ "category": visual scenario this example belongs to like 'time' and 'shopping' out of 8 possible scenarios in ConTextual
31
+ }
32
+ ```
33
+
34
+ """
35
+
36
+ SUBMISSION_TEXT = """
37
+ ## Submissions
38
+ Results can be submitted for only validation here. Scores are expressed as the percentage of correct answers for a given split.
39
+
40
+ Submission made by our team are labelled "ConTextual authors".
41
+
42
+ ### Validation Results
43
+ To submit your validation results to the leaderboard, you can run our auto-evaluation code (Evaluation Pipeline with GPT4), following the instructions [here](https://github.com/rohan598/ConTextual?tab=readme-ov-file#-evaluation-pipeline-gpt-4).
44
+
45
+ We expect submissions to be json format as shown below:
46
+ ```
47
+ {"model_name": {"img_url": "1 or 0 as integer"}
48
+ Replace model name with your model name (string)
49
+ Replace img_url with img_url of the instance (string)
50
+ Value for an img url is either 0 or 1 (int)
51
+ There should be 100 predictions, corresponding to the 100 urls of the val set.
52
+ ```
53
+
54
+ **Please do not utilize the public dev set as part of training data for your models.**
55
+
56
+ ### Test Results
57
+ Once you are happy with your val results, you can send your model predictions to [rohan](mailto:[email protected]) and [hritik](mailto:[email protected]).
58
+
59
+ Please include in your email
60
+ 1) A name for your model.
61
+ 2) Organization (affiliation).
62
+ 3) (Optionally) GitHub repo or paper link.
63
+
64
+ We expect submissions to be json format similar to val set as shown below:
65
+ ```
66
+ {"model_name": {"img_url": "predicted response"}
67
+ Replace model name with your model name (string)
68
+ Replace img_url with img_url of the instance (string)
69
+ Value for an img url is the predicted response for that instance (string)
70
+ There should be 506 predictions, corresponding to the 506 urls of the test set.
71
+ ```
72
+
73
+ **Please revisit the test leaderboard within 1 to 2 days after sharing your prediction file to view your model scores and ranking on the leaderboard.**
74
+
75
+ """
76
+
77
+
78
+ CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
79
+ CITATION_BUTTON_TEXT = r"""@misc{wadhawan2024contextual,
80
+ title={ConTextual: Evaluating Context-Sensitive Text-Rich Visual Reasoning in Large Multimodal Models},
81
+ author={Rohan Wadhawan and Hritik Bansal and Kai-Wei Chang and Nanyun Peng},
82
+ year={2024},
83
+ eprint={2401.13311},
84
+ archivePrefix={arXiv},
85
+ primaryClass={cs.CV}
86
+ }"""
87
+
88
+
89
+ def format_error(msg):
90
+ return f"<p style='color: red; font-size: 20px; text-align: center;'>{msg}</p>"
91
+
92
+ def format_warning(msg):
93
+ return f"<p style='color: orange; font-size: 20px; text-align: center;'>{msg}</p>"
94
+
95
+ def format_log(msg):
96
+ return f"<p style='color: green; font-size: 20px; text-align: center;'>{msg}</p>"
97
+
98
+ def model_hyperlink(link, model_name):
99
+ return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
100
+
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ datasets==2.14.5
2
+ gradio==4.19.2
3
+ huggingface-hub==0.19.3
4
+ numpy==1.24.2
5
+ APScheduler==3.10.1
scorer.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import re
3
+ import string
4
+ import warnings
5
+ import pandas as pd
6
+ import numpy as np
7
+ import os
8
+
9
+ def instruction_scorer(data, judgment_file, model_name):
10
+
11
+ df = data
12
+ img_dict = {}
13
+ for j in range(len(df)):
14
+ row = df.iloc[j]
15
+ img_dict[row['image_url']] = {'category': row['category']}
16
+
17
+ with open(judgment_file, 'r') as f:
18
+ judgements = json.load(f)
19
+
20
+ model_data = judgements[model_name]
21
+
22
+ model_analysis = {}
23
+
24
+ cat = {'time': [0,0], 'shopping': [0,0], 'navigation-transportation': [0,0], 'abstract': [0,0], 'app': [0,0], 'web': [0,0], 'infographics': [0,0], 'stvqa': [0,0], 'estvqa': [0,0]}
25
+
26
+ count, total = 0, 0
27
+
28
+ for key in model_data:
29
+ if key in img_dict:
30
+ img_data = img_dict[key]
31
+ rating = model_data[key]
32
+ count += rating
33
+ total += 1
34
+ cat[img_data['category']][1] += 1
35
+ cat[img_data['category']][0] += rating
36
+
37
+ model_analysis[model_name] = {'category': cat}
38
+
39
+ x = model_analysis[model_name]['category']
40
+
41
+ output_dict = {}
42
+
43
+ for h in x:
44
+ output_dict[h]=100*x[h][0]/x[h][1]
45
+
46
+ output_dict["misc"]= 100 * (x['stvqa'][0] + x['estvqa'][0])/(x['stvqa'][1] + x['stvqa'][1])
47
+
48
+ output_dict["average"] = (output_dict["time"]+output_dict["shopping"]+output_dict["navigation-transportation"]+output_dict["abstract"]+output_dict["app"]+output_dict["web"]+output_dict["infographics"]+output_dict["misc"])/8
49
+
50
+ return output_dict