Spaces:
Running
Running
update leaderboard layout
Browse files- app.py +5 -2
- static/css/style.css +39 -33
- utils.py +5 -3
app.py
CHANGED
@@ -52,6 +52,11 @@ with gr.Blocks(css=css) as block:
|
|
52 |
)
|
53 |
|
54 |
initial_headers, initial_data = get_leaderboard_data(list(SUPER_GROUPS.keys())[0], "All")
|
|
|
|
|
|
|
|
|
|
|
55 |
data_component = gr.Dataframe(
|
56 |
value=initial_data,
|
57 |
headers=initial_headers,
|
@@ -76,5 +81,3 @@ with gr.Blocks(css=css) as block:
|
|
76 |
|
77 |
if __name__ == "__main__":
|
78 |
block.launch(share=True)
|
79 |
-
#block.launch(server_name="127.0.0.1", server_port=7860)
|
80 |
-
|
|
|
52 |
)
|
53 |
|
54 |
initial_headers, initial_data = get_leaderboard_data(list(SUPER_GROUPS.keys())[0], "All")
|
55 |
+
gr.Markdown(
|
56 |
+
"**Table 1: MEGA-Bench full results.** <br> The Core set contains $N_{\\text{core}} = 440$ tasks evaluated by rule-based metrics, and the Open-ended set contains $N_{\\text{open}} = 65$ tasks evaluated by a VLM judge (we use GPT-4o-0806). <br> $\\text{Overall} \\ = \\ \\frac{\\max(\\text{Core w/o CoT}, \\ \\text{Core w/ CoT}) \\ \\cdot \\ N_{\\text{core}} \\ + \\ \\text{Open-ended} \\ \\cdot \\ N_{\\text{open}}}{N_{\\text{core}} \\ + \\ N_{\\text{open}}}$",
|
57 |
+
elem_classes="table-caption",
|
58 |
+
latex_delimiters=[ {"left": "$", "right": "$", "display": False }],
|
59 |
+
)
|
60 |
data_component = gr.Dataframe(
|
61 |
value=initial_data,
|
62 |
headers=initial_headers,
|
|
|
81 |
|
82 |
if __name__ == "__main__":
|
83 |
block.launch(share=True)
|
|
|
|
static/css/style.css
CHANGED
@@ -26,74 +26,80 @@
|
|
26 |
|
27 |
/* Light mode styles */
|
28 |
.custom-dataframe {
|
29 |
-
color:
|
30 |
-
background-color:
|
31 |
}
|
32 |
|
33 |
.custom-dataframe thead th {
|
34 |
-
background-color:
|
35 |
-
color:
|
36 |
}
|
37 |
|
38 |
.custom-dataframe tbody td {
|
39 |
-
background-color:
|
40 |
-
color:
|
41 |
}
|
42 |
|
43 |
-
.custom-dataframe thead th:nth-child(-n+
|
44 |
-
.custom-dataframe tbody td:nth-child(-n+
|
45 |
-
background-color:
|
46 |
}
|
47 |
|
48 |
-
.custom-dataframe thead th:nth-child(n+
|
49 |
-
.custom-dataframe tbody td:nth-child(n+
|
50 |
-
background-color:
|
51 |
}
|
52 |
|
53 |
-
.custom-dataframe tbody tr:nth-child(even) td:nth-child(-n+
|
54 |
-
background-color:
|
55 |
}
|
56 |
|
57 |
-
.custom-dataframe tbody tr:nth-child(even) td:nth-child(n+
|
58 |
-
background-color:
|
59 |
}
|
60 |
|
61 |
/* Dark mode styles */
|
62 |
@media (prefers-color-scheme: dark) {
|
63 |
.custom-dataframe {
|
64 |
-
color:
|
65 |
-
background-color:
|
66 |
}
|
67 |
|
68 |
.custom-dataframe thead th {
|
69 |
-
background-color:
|
70 |
-
color:
|
71 |
}
|
72 |
|
73 |
.custom-dataframe tbody td {
|
74 |
-
background-color:
|
75 |
-
color:
|
76 |
}
|
77 |
|
78 |
-
.custom-dataframe thead th:nth-child(-n+
|
79 |
-
.custom-dataframe tbody td:nth-child(-n+
|
80 |
-
background-color:
|
81 |
}
|
82 |
|
83 |
-
.custom-dataframe thead th:nth-child(n+
|
84 |
-
.custom-dataframe tbody td:nth-child(n+
|
85 |
-
background-color:
|
86 |
}
|
87 |
|
88 |
-
.custom-dataframe tbody tr:nth-child(even) td:nth-child(-n+
|
89 |
-
background-color:
|
90 |
}
|
91 |
|
92 |
-
.custom-dataframe tbody tr:nth-child(even) td:nth-child(n+
|
93 |
-
background-color:
|
94 |
}
|
95 |
|
96 |
.custom-dataframe tbody tr:hover td {
|
97 |
-
background-color:
|
98 |
}
|
99 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
/* Light mode styles */
|
28 |
.custom-dataframe {
|
29 |
+
color: var(--text-color);
|
30 |
+
background-color: var(--background-color);
|
31 |
}
|
32 |
|
33 |
.custom-dataframe thead th {
|
34 |
+
background-color: var(--header-background) !important;
|
35 |
+
color: var(--text-color) !important;
|
36 |
}
|
37 |
|
38 |
.custom-dataframe tbody td {
|
39 |
+
background-color: var(--background-color) !important;
|
40 |
+
color: var(--text-color) !important;
|
41 |
}
|
42 |
|
43 |
+
.custom-dataframe thead th:nth-child(-n+5),
|
44 |
+
.custom-dataframe tbody td:nth-child(-n+5) {
|
45 |
+
background-color: var(--global-column-background) !important;
|
46 |
}
|
47 |
|
48 |
+
.custom-dataframe thead th:nth-child(n+6),
|
49 |
+
.custom-dataframe tbody td:nth-child(n+6) {
|
50 |
+
background-color: var(--dimension-column-background) !important;
|
51 |
}
|
52 |
|
53 |
+
.custom-dataframe tbody tr:nth-child(even) td:nth-child(-n+5) {
|
54 |
+
background-color: var(--row-even-global) !important;
|
55 |
}
|
56 |
|
57 |
+
.custom-dataframe tbody tr:nth-child(even) td:nth-child(n+6) {
|
58 |
+
background-color: var(--row-even-dimension) !important;
|
59 |
}
|
60 |
|
61 |
/* Dark mode styles */
|
62 |
@media (prefers-color-scheme: dark) {
|
63 |
.custom-dataframe {
|
64 |
+
color: var(--text-color) !important;
|
65 |
+
background-color: var(--background-color) !important;
|
66 |
}
|
67 |
|
68 |
.custom-dataframe thead th {
|
69 |
+
background-color: var(--header-background) !important;
|
70 |
+
color: var(--text-color) !important;
|
71 |
}
|
72 |
|
73 |
.custom-dataframe tbody td {
|
74 |
+
background-color: var(--background-color) !important;
|
75 |
+
color: var(--text-color) !important;
|
76 |
}
|
77 |
|
78 |
+
.custom-dataframe thead th:nth-child(-n+5),
|
79 |
+
.custom-dataframe tbody td:nth-child(-n+5) {
|
80 |
+
background-color: var(--global-column-background) !important;
|
81 |
}
|
82 |
|
83 |
+
.custom-dataframe thead th:nth-child(n+6),
|
84 |
+
.custom-dataframe tbody td:nth-child(n+6) {
|
85 |
+
background-color: var(--dimension-column-background) !important;
|
86 |
}
|
87 |
|
88 |
+
.custom-dataframe tbody tr:nth-child(even) td:nth-child(-n+5) {
|
89 |
+
background-color: var(--row-even-global) !important;
|
90 |
}
|
91 |
|
92 |
+
.custom-dataframe tbody tr:nth-child(even) td:nth-child(n+6) {
|
93 |
+
background-color: var(--row-even-dimension) !important;
|
94 |
}
|
95 |
|
96 |
.custom-dataframe tbody tr:hover td {
|
97 |
+
background-color: var(--hover-background) !important;
|
98 |
}
|
99 |
}
|
100 |
+
|
101 |
+
.table-caption {
|
102 |
+
text-align: center;
|
103 |
+
margin-top: 10px;
|
104 |
+
color: var(--text-color);
|
105 |
+
}
|
utils.py
CHANGED
@@ -121,11 +121,13 @@ def get_df(selected_super_group, selected_model_group):
|
|
121 |
for model in MODEL_GROUPS[selected_model_group]:
|
122 |
model_data = MODEL_DATA[model]
|
123 |
summary = SUMMARY_DATA[model]
|
124 |
-
|
|
|
125 |
row = {
|
126 |
"Models": get_display_model_name(model), # Use the mapped name
|
127 |
"Overall": round(summary["overall_score"] * 100, 2),
|
128 |
-
"Core": round(
|
|
|
129 |
"Open-ended": round(summary["open"]["macro_mean_score"] * 100, 2)
|
130 |
}
|
131 |
for keyword in SUPER_GROUPS[selected_super_group]:
|
@@ -142,6 +144,6 @@ def get_df(selected_super_group, selected_model_group):
|
|
142 |
|
143 |
def get_leaderboard_data(selected_super_group, selected_model_group):
|
144 |
df = get_df(selected_super_group, selected_model_group)
|
145 |
-
headers = ["Models", "Overall", "Core", "Open-ended"] + SUPER_GROUPS[selected_super_group]
|
146 |
data = df[headers].values.tolist()
|
147 |
return headers, data
|
|
|
121 |
for model in MODEL_GROUPS[selected_model_group]:
|
122 |
model_data = MODEL_DATA[model]
|
123 |
summary = SUMMARY_DATA[model]
|
124 |
+
core_noncot_score = summary["core_noncot"]["macro_mean_score"]
|
125 |
+
core_cot_score = summary["core_cot"]["macro_mean_score"]
|
126 |
row = {
|
127 |
"Models": get_display_model_name(model), # Use the mapped name
|
128 |
"Overall": round(summary["overall_score"] * 100, 2),
|
129 |
+
"Core(w/o CoT)": round(core_noncot_score * 100, 2),
|
130 |
+
"Core(w/ CoT)": round(core_cot_score * 100, 2),
|
131 |
"Open-ended": round(summary["open"]["macro_mean_score"] * 100, 2)
|
132 |
}
|
133 |
for keyword in SUPER_GROUPS[selected_super_group]:
|
|
|
144 |
|
145 |
def get_leaderboard_data(selected_super_group, selected_model_group):
|
146 |
df = get_df(selected_super_group, selected_model_group)
|
147 |
+
headers = ["Models", "Overall", "Core(w/o CoT)", "Core(w/ CoT)", "Open-ended"] + SUPER_GROUPS[selected_super_group]
|
148 |
data = df[headers].values.tolist()
|
149 |
return headers, data
|