cccjc commited on
Commit
0d5512e
·
1 Parent(s): aedc60d

update leaderboard layout

Browse files
Files changed (3) hide show
  1. app.py +5 -2
  2. static/css/style.css +39 -33
  3. utils.py +5 -3
app.py CHANGED
@@ -52,6 +52,11 @@ with gr.Blocks(css=css) as block:
52
  )
53
 
54
  initial_headers, initial_data = get_leaderboard_data(list(SUPER_GROUPS.keys())[0], "All")
 
 
 
 
 
55
  data_component = gr.Dataframe(
56
  value=initial_data,
57
  headers=initial_headers,
@@ -76,5 +81,3 @@ with gr.Blocks(css=css) as block:
76
 
77
  if __name__ == "__main__":
78
  block.launch(share=True)
79
- #block.launch(server_name="127.0.0.1", server_port=7860)
80
-
 
52
  )
53
 
54
  initial_headers, initial_data = get_leaderboard_data(list(SUPER_GROUPS.keys())[0], "All")
55
+ gr.Markdown(
56
+ "**Table 1: MEGA-Bench full results.** <br> The Core set contains $N_{\\text{core}} = 440$ tasks evaluated by rule-based metrics, and the Open-ended set contains $N_{\\text{open}} = 65$ tasks evaluated by a VLM judge (we use GPT-4o-0806). <br> $\\text{Overall} \\ = \\ \\frac{\\max(\\text{Core w/o CoT}, \\ \\text{Core w/ CoT}) \\ \\cdot \\ N_{\\text{core}} \\ + \\ \\text{Open-ended} \\ \\cdot \\ N_{\\text{open}}}{N_{\\text{core}} \\ + \\ N_{\\text{open}}}$",
57
+ elem_classes="table-caption",
58
+ latex_delimiters=[ {"left": "$", "right": "$", "display": False }],
59
+ )
60
  data_component = gr.Dataframe(
61
  value=initial_data,
62
  headers=initial_headers,
 
81
 
82
  if __name__ == "__main__":
83
  block.launch(share=True)
 
 
static/css/style.css CHANGED
@@ -26,74 +26,80 @@
26
 
27
  /* Light mode styles */
28
  .custom-dataframe {
29
- color: #000000;
30
- background-color: #ffffff;
31
  }
32
 
33
  .custom-dataframe thead th {
34
- background-color: #f0f0f0 !important;
35
- color: #000000 !important;
36
  }
37
 
38
  .custom-dataframe tbody td {
39
- background-color: #ffffff !important;
40
- color: #000000 !important;
41
  }
42
 
43
- .custom-dataframe thead th:nth-child(-n+4),
44
- .custom-dataframe tbody td:nth-child(-n+4) {
45
- background-color: #f0f8ff !important;
46
  }
47
 
48
- .custom-dataframe thead th:nth-child(n+5),
49
- .custom-dataframe tbody td:nth-child(n+5) {
50
- background-color: #f0fff0 !important;
51
  }
52
 
53
- .custom-dataframe tbody tr:nth-child(even) td:nth-child(-n+4) {
54
- background-color: #e6f3ff !important;
55
  }
56
 
57
- .custom-dataframe tbody tr:nth-child(even) td:nth-child(n+5) {
58
- background-color: #e6ffe6 !important;
59
  }
60
 
61
  /* Dark mode styles */
62
  @media (prefers-color-scheme: dark) {
63
  .custom-dataframe {
64
- color: #e0e0e0 !important;
65
- background-color: #1a1a1a !important;
66
  }
67
 
68
  .custom-dataframe thead th {
69
- background-color: #333333 !important;
70
- color: #ffffff !important;
71
  }
72
 
73
  .custom-dataframe tbody td {
74
- background-color: #1a1a1a !important;
75
- color: #e0e0e0 !important;
76
  }
77
 
78
- .custom-dataframe thead th:nth-child(-n+4),
79
- .custom-dataframe tbody td:nth-child(-n+4) {
80
- background-color: rgba(0, 50, 100, 0.3) !important;
81
  }
82
 
83
- .custom-dataframe thead th:nth-child(n+5),
84
- .custom-dataframe tbody td:nth-child(n+5) {
85
- background-color: rgba(0, 75, 0, 0.3) !important;
86
  }
87
 
88
- .custom-dataframe tbody tr:nth-child(even) td:nth-child(-n+4) {
89
- background-color: rgba(0, 60, 120, 0.3) !important;
90
  }
91
 
92
- .custom-dataframe tbody tr:nth-child(even) td:nth-child(n+5) {
93
- background-color: rgba(0, 90, 0, 0.3) !important;
94
  }
95
 
96
  .custom-dataframe tbody tr:hover td {
97
- background-color: rgba(255, 255, 255, 0.1) !important;
98
  }
99
  }
 
 
 
 
 
 
 
26
 
27
  /* Light mode styles */
28
  .custom-dataframe {
29
+ color: var(--text-color);
30
+ background-color: var(--background-color);
31
  }
32
 
33
  .custom-dataframe thead th {
34
+ background-color: var(--header-background) !important;
35
+ color: var(--text-color) !important;
36
  }
37
 
38
  .custom-dataframe tbody td {
39
+ background-color: var(--background-color) !important;
40
+ color: var(--text-color) !important;
41
  }
42
 
43
+ .custom-dataframe thead th:nth-child(-n+5),
44
+ .custom-dataframe tbody td:nth-child(-n+5) {
45
+ background-color: var(--global-column-background) !important;
46
  }
47
 
48
+ .custom-dataframe thead th:nth-child(n+6),
49
+ .custom-dataframe tbody td:nth-child(n+6) {
50
+ background-color: var(--dimension-column-background) !important;
51
  }
52
 
53
+ .custom-dataframe tbody tr:nth-child(even) td:nth-child(-n+5) {
54
+ background-color: var(--row-even-global) !important;
55
  }
56
 
57
+ .custom-dataframe tbody tr:nth-child(even) td:nth-child(n+6) {
58
+ background-color: var(--row-even-dimension) !important;
59
  }
60
 
61
  /* Dark mode styles */
62
  @media (prefers-color-scheme: dark) {
63
  .custom-dataframe {
64
+ color: var(--text-color) !important;
65
+ background-color: var(--background-color) !important;
66
  }
67
 
68
  .custom-dataframe thead th {
69
+ background-color: var(--header-background) !important;
70
+ color: var(--text-color) !important;
71
  }
72
 
73
  .custom-dataframe tbody td {
74
+ background-color: var(--background-color) !important;
75
+ color: var(--text-color) !important;
76
  }
77
 
78
+ .custom-dataframe thead th:nth-child(-n+5),
79
+ .custom-dataframe tbody td:nth-child(-n+5) {
80
+ background-color: var(--global-column-background) !important;
81
  }
82
 
83
+ .custom-dataframe thead th:nth-child(n+6),
84
+ .custom-dataframe tbody td:nth-child(n+6) {
85
+ background-color: var(--dimension-column-background) !important;
86
  }
87
 
88
+ .custom-dataframe tbody tr:nth-child(even) td:nth-child(-n+5) {
89
+ background-color: var(--row-even-global) !important;
90
  }
91
 
92
+ .custom-dataframe tbody tr:nth-child(even) td:nth-child(n+6) {
93
+ background-color: var(--row-even-dimension) !important;
94
  }
95
 
96
  .custom-dataframe tbody tr:hover td {
97
+ background-color: var(--hover-background) !important;
98
  }
99
  }
100
+
101
+ .table-caption {
102
+ text-align: center;
103
+ margin-top: 10px;
104
+ color: var(--text-color);
105
+ }
utils.py CHANGED
@@ -121,11 +121,13 @@ def get_df(selected_super_group, selected_model_group):
121
  for model in MODEL_GROUPS[selected_model_group]:
122
  model_data = MODEL_DATA[model]
123
  summary = SUMMARY_DATA[model]
124
- core_score = max(summary["core_noncot"]["macro_mean_score"], summary["core_cot"]["macro_mean_score"])
 
125
  row = {
126
  "Models": get_display_model_name(model), # Use the mapped name
127
  "Overall": round(summary["overall_score"] * 100, 2),
128
- "Core": round(core_score * 100, 2),
 
129
  "Open-ended": round(summary["open"]["macro_mean_score"] * 100, 2)
130
  }
131
  for keyword in SUPER_GROUPS[selected_super_group]:
@@ -142,6 +144,6 @@ def get_df(selected_super_group, selected_model_group):
142
 
143
  def get_leaderboard_data(selected_super_group, selected_model_group):
144
  df = get_df(selected_super_group, selected_model_group)
145
- headers = ["Models", "Overall", "Core", "Open-ended"] + SUPER_GROUPS[selected_super_group]
146
  data = df[headers].values.tolist()
147
  return headers, data
 
121
  for model in MODEL_GROUPS[selected_model_group]:
122
  model_data = MODEL_DATA[model]
123
  summary = SUMMARY_DATA[model]
124
+ core_noncot_score = summary["core_noncot"]["macro_mean_score"]
125
+ core_cot_score = summary["core_cot"]["macro_mean_score"]
126
  row = {
127
  "Models": get_display_model_name(model), # Use the mapped name
128
  "Overall": round(summary["overall_score"] * 100, 2),
129
+ "Core(w/o CoT)": round(core_noncot_score * 100, 2),
130
+ "Core(w/ CoT)": round(core_cot_score * 100, 2),
131
  "Open-ended": round(summary["open"]["macro_mean_score"] * 100, 2)
132
  }
133
  for keyword in SUPER_GROUPS[selected_super_group]:
 
144
 
145
  def get_leaderboard_data(selected_super_group, selected_model_group):
146
  df = get_df(selected_super_group, selected_model_group)
147
+ headers = ["Models", "Overall", "Core(w/o CoT)", "Core(w/ CoT)", "Open-ended"] + SUPER_GROUPS[selected_super_group]
148
  data = df[headers].values.tolist()
149
  return headers, data