Spaces:

TIGER-Lab
/

MEGA-Bench

Running

App Files Files Community

cccjc commited on Oct 30, 2024

Commit

0d5512e

1 Parent(s): aedc60d

update leaderboard layout

Browse files

Files changed (3) hide show

app.py +5 -2
static/css/style.css +39 -33
utils.py +5 -3

app.py CHANGED Viewed

@@ -52,6 +52,11 @@ with gr.Blocks(css=css) as block:
                 )
             initial_headers, initial_data = get_leaderboard_data(list(SUPER_GROUPS.keys())[0], "All")
             data_component = gr.Dataframe(
                 value=initial_data,
                 headers=initial_headers,
@@ -76,5 +81,3 @@ with gr.Blocks(css=css) as block:
 if __name__ == "__main__":
     block.launch(share=True)
-    #block.launch(server_name="127.0.0.1", server_port=7860)

                 )
             initial_headers, initial_data = get_leaderboard_data(list(SUPER_GROUPS.keys())[0], "All")
+            gr.Markdown(
+                "**Table 1: MEGA-Bench full results.** <br> The Core set contains $N_{\\text{core}} = 440$ tasks evaluated by rule-based metrics, and the Open-ended set contains $N_{\\text{open}} = 65$ tasks evaluated by a VLM judge (we use GPT-4o-0806). <br> $\\text{Overall} \\ = \\ \\frac{\\max(\\text{Core w/o CoT}, \\ \\text{Core w/ CoT}) \\ \\cdot \\ N_{\\text{core}} \\ + \\ \\text{Open-ended} \\ \\cdot \\ N_{\\text{open}}}{N_{\\text{core}} \\ + \\ N_{\\text{open}}}$",
+                elem_classes="table-caption",
+                latex_delimiters=[ {"left": "$", "right": "$", "display": False }],
+            )
             data_component = gr.Dataframe(
                 value=initial_data,
                 headers=initial_headers,
 if __name__ == "__main__":
     block.launch(share=True)

static/css/style.css CHANGED Viewed

@@ -26,74 +26,80 @@
 /* Light mode styles */
 .custom-dataframe {
-    color: #000000;
-    background-color: #ffffff;
 }
 .custom-dataframe thead th {
-    background-color: #f0f0f0 !important;
-    color: #000000 !important;
 }
 .custom-dataframe tbody td {
-    background-color: #ffffff !important;
-    color: #000000 !important;
 }
-.custom-dataframe thead th:nth-child(-n+4),
-.custom-dataframe tbody td:nth-child(-n+4) {
-    background-color: #f0f8ff !important;
 }
-.custom-dataframe thead th:nth-child(n+5),
-.custom-dataframe tbody td:nth-child(n+5) {
-    background-color: #f0fff0 !important;
 }
-.custom-dataframe tbody tr:nth-child(even) td:nth-child(-n+4) {
-    background-color: #e6f3ff !important;
 }
-.custom-dataframe tbody tr:nth-child(even) td:nth-child(n+5) {
-    background-color: #e6ffe6 !important;
 }
 /* Dark mode styles */
 @media (prefers-color-scheme: dark) {
     .custom-dataframe {
-        color: #e0e0e0 !important;
-        background-color: #1a1a1a !important;
     }
     .custom-dataframe thead th {
-        background-color: #333333 !important;
-        color: #ffffff !important;
     }
     .custom-dataframe tbody td {
-        background-color: #1a1a1a !important;
-        color: #e0e0e0 !important;
     }
-    .custom-dataframe thead th:nth-child(-n+4),
-    .custom-dataframe tbody td:nth-child(-n+4) {
-        background-color: rgba(0, 50, 100, 0.3) !important;
     }
-    .custom-dataframe thead th:nth-child(n+5),
-    .custom-dataframe tbody td:nth-child(n+5) {
-        background-color: rgba(0, 75, 0, 0.3) !important;
     }
-    .custom-dataframe tbody tr:nth-child(even) td:nth-child(-n+4) {
-        background-color: rgba(0, 60, 120, 0.3) !important;
     }
-    .custom-dataframe tbody tr:nth-child(even) td:nth-child(n+5) {
-        background-color: rgba(0, 90, 0, 0.3) !important;
     }
     .custom-dataframe tbody tr:hover td {
-        background-color: rgba(255, 255, 255, 0.1) !important;
     }
 }

 /* Light mode styles */
 .custom-dataframe {
+    color: var(--text-color);
+    background-color: var(--background-color);
 }
 .custom-dataframe thead th {
+    background-color: var(--header-background) !important;
+    color: var(--text-color) !important;
 }
 .custom-dataframe tbody td {
+    background-color: var(--background-color) !important;
+    color: var(--text-color) !important;
 }
+.custom-dataframe thead th:nth-child(-n+5),
+.custom-dataframe tbody td:nth-child(-n+5) {
+    background-color: var(--global-column-background) !important;
 }
+.custom-dataframe thead th:nth-child(n+6),
+.custom-dataframe tbody td:nth-child(n+6) {
+    background-color: var(--dimension-column-background) !important;
 }
+.custom-dataframe tbody tr:nth-child(even) td:nth-child(-n+5) {
+    background-color: var(--row-even-global) !important;
 }
+.custom-dataframe tbody tr:nth-child(even) td:nth-child(n+6) {
+    background-color: var(--row-even-dimension) !important;
 }
 /* Dark mode styles */
 @media (prefers-color-scheme: dark) {
     .custom-dataframe {
+        color: var(--text-color) !important;
+        background-color: var(--background-color) !important;
     }
     .custom-dataframe thead th {
+        background-color: var(--header-background) !important;
+        color: var(--text-color) !important;
     }
     .custom-dataframe tbody td {
+        background-color: var(--background-color) !important;
+        color: var(--text-color) !important;
     }
+    .custom-dataframe thead th:nth-child(-n+5),
+    .custom-dataframe tbody td:nth-child(-n+5) {
+        background-color: var(--global-column-background) !important;
     }
+    .custom-dataframe thead th:nth-child(n+6),
+    .custom-dataframe tbody td:nth-child(n+6) {
+        background-color: var(--dimension-column-background) !important;
     }
+    .custom-dataframe tbody tr:nth-child(even) td:nth-child(-n+5) {
+        background-color: var(--row-even-global) !important;
     }
+    .custom-dataframe tbody tr:nth-child(even) td:nth-child(n+6) {
+        background-color: var(--row-even-dimension) !important;
     }
     .custom-dataframe tbody tr:hover td {
+        background-color: var(--hover-background) !important;
     }
 }
+.table-caption {
+    text-align: center;
+    margin-top: 10px;
+    color: var(--text-color);
+}

utils.py CHANGED Viewed

@@ -121,11 +121,13 @@ def get_df(selected_super_group, selected_model_group):
     for model in MODEL_GROUPS[selected_model_group]:
         model_data = MODEL_DATA[model]
         summary = SUMMARY_DATA[model]
-        core_score = max(summary["core_noncot"]["macro_mean_score"], summary["core_cot"]["macro_mean_score"])
         row = {
             "Models": get_display_model_name(model),  # Use the mapped name
             "Overall": round(summary["overall_score"] * 100, 2),
-            "Core": round(core_score * 100, 2),
             "Open-ended": round(summary["open"]["macro_mean_score"] * 100, 2)
         }
         for keyword in SUPER_GROUPS[selected_super_group]:
@@ -142,6 +144,6 @@ def get_df(selected_super_group, selected_model_group):
 def get_leaderboard_data(selected_super_group, selected_model_group):
     df = get_df(selected_super_group, selected_model_group)
-    headers = ["Models", "Overall", "Core", "Open-ended"] + SUPER_GROUPS[selected_super_group]
     data = df[headers].values.tolist()
     return headers, data

     for model in MODEL_GROUPS[selected_model_group]:
         model_data = MODEL_DATA[model]
         summary = SUMMARY_DATA[model]
+        core_noncot_score = summary["core_noncot"]["macro_mean_score"]
+        core_cot_score = summary["core_cot"]["macro_mean_score"]
         row = {
             "Models": get_display_model_name(model),  # Use the mapped name
             "Overall": round(summary["overall_score"] * 100, 2),
+            "Core(w/o CoT)": round(core_noncot_score * 100, 2),
+            "Core(w/ CoT)": round(core_cot_score * 100, 2),
             "Open-ended": round(summary["open"]["macro_mean_score"] * 100, 2)
         }
         for keyword in SUPER_GROUPS[selected_super_group]:
 def get_leaderboard_data(selected_super_group, selected_model_group):
     df = get_df(selected_super_group, selected_model_group)
+    headers = ["Models", "Overall", "Core(w/o CoT)", "Core(w/ CoT)", "Open-ended"] + SUPER_GROUPS[selected_super_group]
     data = df[headers].values.tolist()
     return headers, data