cccjc commited on
Commit
8b2c873
·
1 Parent(s): f3f40fb

add some uncommited code of f3f40fb

Browse files
Files changed (2) hide show
  1. app.py +3 -3
  2. utils.py +32 -97
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import gradio as gr
2
- from utils import DefaultDataLoader, SingleImageDataLoader
3
  import os
4
  from constants import *
5
 
@@ -20,8 +20,8 @@ with open(si_css_file, "r") as f:
20
  si_css = f.read()
21
 
22
  # Initialize data loaders
23
- default_loader = DefaultDataLoader()
24
- si_loader = SingleImageDataLoader()
25
 
26
  with gr.Blocks() as block:
27
  # Add a style element that we'll update
 
1
  import gradio as gr
2
+ from utils import MEGABenchEvalDataLoader
3
  import os
4
  from constants import *
5
 
 
20
  si_css = f.read()
21
 
22
  # Initialize data loaders
23
+ default_loader = MEGABenchEvalDataLoader("./static/eval_results/Default")
24
+ si_loader = MEGABenchEvalDataLoader("./static/eval_results/SI")
25
 
26
  with gr.Blocks() as block:
27
  # Add a style element that we'll update
utils.py CHANGED
@@ -10,29 +10,48 @@ from constants import (
10
  BASE_MODEL_GROUPS
11
  )
12
 
13
- class BaseDataLoader:
14
- def __init__(self):
15
- self.MODEL_DATA = self._load_model_data()
16
- self.SUMMARY_DATA = self._load_summary_data()
 
17
  self.SUPER_GROUPS = self._initialize_super_groups()
18
  self.MODEL_GROUPS = self._initialize_model_groups()
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  def _initialize_super_groups(self):
21
  # Get a sample model to access the structure
22
- sample_model = next(iter(self.MODEL_DATA))
23
 
24
  # Create groups with task counts
25
  groups = {}
26
  self.keyword_display_map = {} # Add this map to store display-to-original mapping
27
 
28
- for dim in self.MODEL_DATA[sample_model]:
29
  dim_name = DIMENSION_NAME_MAP[dim]
30
  # Create a list of tuples (display_name, count, keyword) for sorting
31
  keyword_info = []
32
 
33
- for keyword in self.MODEL_DATA[sample_model][dim]:
34
  # Get the task count for this keyword
35
- task_count = self.MODEL_DATA[sample_model][dim][keyword]["count"]
36
  original_name = KEYWORD_NAME_MAP.get(keyword, keyword)
37
  display_name = f"{original_name}({task_count})"
38
  keyword_info.append((display_name, task_count, keyword))
@@ -50,7 +69,7 @@ class BaseDataLoader:
50
  return {k: groups[k] for k in order if k in groups}
51
 
52
  def _initialize_model_groups(self) -> Dict[str, list]:
53
- available_models = set(self.MODEL_DATA.keys())
54
 
55
  filtered_groups = {}
56
  for group_name, models in BASE_MODEL_GROUPS.items():
@@ -63,21 +82,15 @@ class BaseDataLoader:
63
 
64
  return filtered_groups
65
 
66
- def _load_model_data(self) -> Dict[str, Any]:
67
- raise NotImplementedError("Subclasses must implement _load_model_data")
68
-
69
- def _load_summary_data(self) -> Dict[str, Any]:
70
- raise NotImplementedError("Subclasses must implement _load_summary_data")
71
-
72
  def get_df(self, selected_super_group: str, selected_model_group: str) -> pd.DataFrame:
73
  original_dimension = get_original_dimension(selected_super_group)
74
  data = []
75
 
76
  for model in self.MODEL_GROUPS[selected_model_group]:
77
- if model not in self.MODEL_DATA or model not in self.SUMMARY_DATA:
78
  continue
79
 
80
- model_data = self.MODEL_DATA[model]
81
  summary = self.SUMMARY_DATA[model]
82
 
83
  # Basic model information
@@ -110,11 +123,11 @@ class BaseDataLoader:
110
  df = self.get_df(selected_super_group, selected_model_group)
111
 
112
  # Get total task counts from the first model's data
113
- sample_model = next(iter(self.SUMMARY_DATA))
114
  total_core_tasks = self.SUMMARY_DATA[sample_model]["core"]["num_eval_tasks"]
115
  total_open_tasks = self.SUMMARY_DATA[sample_model]["open"]["num_eval_tasks"]
116
  total_tasks = total_core_tasks + total_open_tasks
117
-
118
  # Define headers with task counts
119
  column_headers = {
120
  "Models": "Models",
@@ -143,84 +156,6 @@ class BaseDataLoader:
143
  return headers, data
144
 
145
 
146
- class DefaultDataLoader(BaseDataLoader):
147
- def __init__(self):
148
- super().__init__()
149
-
150
- def _load_model_data(self) -> Dict[str, Any]:
151
- model_data = {}
152
- base_path = "./static/eval_results/Default"
153
-
154
- try:
155
- model_folders = [f for f in os.listdir(base_path) if os.path.isdir(os.path.join(base_path, f))]
156
- for model_name in model_folders:
157
- model_path = f"{base_path}/{model_name}/summary_results.json"
158
- with open(model_path, "r") as f:
159
- data = json.load(f)
160
- if "keyword_stats" in data:
161
- model_data[model_name] = data["keyword_stats"]
162
- except FileNotFoundError:
163
- pass
164
-
165
- return model_data
166
-
167
- def _load_summary_data(self) -> Dict[str, Any]:
168
- summary_data = {}
169
- base_path = "./static/eval_results/Default"
170
-
171
- try:
172
- model_folders = [f for f in os.listdir(base_path) if os.path.isdir(os.path.join(base_path, f))]
173
- for model_name in model_folders:
174
- model_path = f"{base_path}/{model_name}/summary_results.json"
175
- with open(model_path, "r") as f:
176
- data = json.load(f)
177
- if "model_summary" in data:
178
- summary_data[model_name] = data["model_summary"]
179
- except FileNotFoundError:
180
- pass
181
-
182
- return summary_data
183
-
184
-
185
- class SingleImageDataLoader(BaseDataLoader):
186
- def __init__(self):
187
- super().__init__()
188
-
189
- def _load_model_data(self) -> Dict[str, Any]:
190
- model_data = {}
191
- base_path = "./static/eval_results/SI"
192
-
193
- try:
194
- model_folders = [f for f in os.listdir(base_path) if os.path.isdir(os.path.join(base_path, f))]
195
- for model_name in model_folders:
196
- model_path = f"{base_path}/{model_name}/summary_results.json"
197
- with open(model_path, "r") as f:
198
- data = json.load(f)
199
- if "keyword_stats" in data:
200
- model_data[model_name] = data["keyword_stats"]
201
- except FileNotFoundError:
202
- pass
203
-
204
- return model_data
205
-
206
- def _load_summary_data(self) -> Dict[str, Any]:
207
- summary_data = {}
208
- base_path = "./static/eval_results/SI"
209
-
210
- try:
211
- model_folders = [f for f in os.listdir(base_path) if os.path.isdir(os.path.join(base_path, f))]
212
- for model_name in model_folders:
213
- model_path = f"{base_path}/{model_name}/summary_results.json"
214
- with open(model_path, "r") as f:
215
- data = json.load(f)
216
- if "model_summary" in data:
217
- summary_data[model_name] = data["model_summary"]
218
- except FileNotFoundError:
219
- pass
220
-
221
- return summary_data
222
-
223
-
224
  # Keep your helper functions
225
  def get_original_dimension(mapped_dimension):
226
  return next(k for k, v in DIMENSION_NAME_MAP.items() if v == mapped_dimension)
 
10
  BASE_MODEL_GROUPS
11
  )
12
 
13
+ class MEGABenchEvalDataLoader:
14
+ def __init__(self, base_path):
15
+ self.base_path = base_path
16
+ # Load both model and summary data at once
17
+ self.KEYWORD_DATA, self.SUMMARY_DATA = self._load_data()
18
  self.SUPER_GROUPS = self._initialize_super_groups()
19
  self.MODEL_GROUPS = self._initialize_model_groups()
20
 
21
+ def _get_base_path(self) -> str:
22
+ raise NotImplementedError("Subclasses must implement _get_base_path")
23
+
24
+ def _load_data(self) -> Tuple[Dict[str, Any], Dict[str, Any]]:
25
+ summary_data = {}
26
+ keyword_data = {}
27
+ model_folders = [f for f in os.listdir(self.base_path) if os.path.isdir(os.path.join(self.base_path, f))]
28
+ for model_name in model_folders:
29
+ model_path = f"{self.base_path}/{model_name}/summary_and_keyword_stats.json"
30
+ with open(model_path, "r") as f:
31
+ data = json.load(f)
32
+ if "keyword_stats" in data:
33
+ keyword_data[model_name] = data["keyword_stats"]
34
+ if "model_summary" in data:
35
+ summary_data[model_name] = data["model_summary"]
36
+
37
+ return keyword_data, summary_data
38
+
39
  def _initialize_super_groups(self):
40
  # Get a sample model to access the structure
41
+ sample_model = next(iter(self.KEYWORD_DATA))
42
 
43
  # Create groups with task counts
44
  groups = {}
45
  self.keyword_display_map = {} # Add this map to store display-to-original mapping
46
 
47
+ for dim in self.KEYWORD_DATA[sample_model]:
48
  dim_name = DIMENSION_NAME_MAP[dim]
49
  # Create a list of tuples (display_name, count, keyword) for sorting
50
  keyword_info = []
51
 
52
+ for keyword in self.KEYWORD_DATA[sample_model][dim]:
53
  # Get the task count for this keyword
54
+ task_count = self.KEYWORD_DATA[sample_model][dim][keyword]["count"]
55
  original_name = KEYWORD_NAME_MAP.get(keyword, keyword)
56
  display_name = f"{original_name}({task_count})"
57
  keyword_info.append((display_name, task_count, keyword))
 
69
  return {k: groups[k] for k in order if k in groups}
70
 
71
  def _initialize_model_groups(self) -> Dict[str, list]:
72
+ available_models = set(self.KEYWORD_DATA.keys())
73
 
74
  filtered_groups = {}
75
  for group_name, models in BASE_MODEL_GROUPS.items():
 
82
 
83
  return filtered_groups
84
 
 
 
 
 
 
 
85
  def get_df(self, selected_super_group: str, selected_model_group: str) -> pd.DataFrame:
86
  original_dimension = get_original_dimension(selected_super_group)
87
  data = []
88
 
89
  for model in self.MODEL_GROUPS[selected_model_group]:
90
+ if model not in self.KEYWORD_DATA or model not in self.SUMMARY_DATA:
91
  continue
92
 
93
+ model_data = self.KEYWORD_DATA[model]
94
  summary = self.SUMMARY_DATA[model]
95
 
96
  # Basic model information
 
123
  df = self.get_df(selected_super_group, selected_model_group)
124
 
125
  # Get total task counts from the first model's data
126
+ sample_model = "GPT_4o"
127
  total_core_tasks = self.SUMMARY_DATA[sample_model]["core"]["num_eval_tasks"]
128
  total_open_tasks = self.SUMMARY_DATA[sample_model]["open"]["num_eval_tasks"]
129
  total_tasks = total_core_tasks + total_open_tasks
130
+
131
  # Define headers with task counts
132
  column_headers = {
133
  "Models": "Models",
 
156
  return headers, data
157
 
158
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  # Keep your helper functions
160
  def get_original_dimension(mapped_dimension):
161
  return next(k for k, v in DIMENSION_NAME_MAP.items() if v == mapped_dimension)