Spaces:
Running
Running
File size: 3,842 Bytes
b2c8d29 8553d06 b2c8d29 8553d06 b2c8d29 8553d06 b2c8d29 8553d06 f724d2e 8553d06 0d5512e 8553d06 aedc60d 8553d06 b2c8d29 8553d06 be407a0 b2c8d29 8553d06 b2c8d29 8553d06 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
import gradio as gr
from utils import get_leaderboard_data, SUPER_GROUPS, MODEL_GROUPS
import os
from constants import *
# Get the directory of the current script
current_dir = os.path.dirname(os.path.abspath(__file__))
# Construct the path to the CSS file
css_file = os.path.join(current_dir, "static", "css", "style.css")
# Read the CSS file
with open(css_file, "r") as f:
css = f.read()
def update_leaderboard(selected_super_group, selected_model_group):
headers, data = get_leaderboard_data(selected_super_group, selected_model_group)
return gr.Dataframe(
value=data,
headers=headers,
datatype=["str"] + ["number"] * (len(headers) - 1),
)
with gr.Blocks(css=css) as block:
gr.Markdown(
LEADERBOARD_INTRODUCTION
)
with gr.Tabs(elem_classes="tab-buttons") as tabs:
with gr.TabItem("π MEGA-Bench", elem_id="qa-tab-table1", id=1):
with gr.Row():
with gr.Accordion("Citation", open=False):
citation_button = gr.Textbox(
value=CITATION_BUTTON_TEXT,
label=CITATION_BUTTON_LABEL,
elem_id="citation-button",
lines=10,
)
gr.Markdown(
TABLE_INTRODUCTION
)
with gr.Row():
super_group_selector = gr.Radio(
choices=list(SUPER_GROUPS.keys()),
label="Select a dimension to display breakdown results. We use different column colors to distinguish the overall benchmark scores and breakdown results.",
value=list(SUPER_GROUPS.keys())[0]
)
model_group_selector = gr.Radio(
choices=list(MODEL_GROUPS.keys()),
label="Select a model group",
value="All"
)
initial_headers, initial_data = get_leaderboard_data(list(SUPER_GROUPS.keys())[0], "All")
gr.Markdown(
"**Table 1: MEGA-Bench full results.** <br> The Core set contains $N_{\\text{core}} = 440$ tasks evaluated by rule-based metrics, and the Open-ended set contains $N_{\\text{open}} = 65$ tasks evaluated by a VLM judge (we use GPT-4o-0806). <br> $\\text{Overall} \\ = \\ \\frac{\\max(\\text{Core w/o CoT}, \\ \\text{Core w/ CoT}) \\ \\cdot \\ N_{\\text{core}} \\ + \\ \\text{Open-ended} \\ \\cdot \\ N_{\\text{open}}}{N_{\\text{core}} \\ + \\ N_{\\text{open}}}$",
elem_classes="table-caption",
latex_delimiters=[ {"left": "$", "right": "$", "display": False }],
)
data_component = gr.Dataframe(
value=initial_data,
headers=initial_headers,
datatype=["str"] + ["number"] * (len(initial_headers) - 1),
interactive=False,
elem_classes="custom-dataframe",
max_height=1200,
)
refresh_button = gr.Button("Refresh")
refresh_button.click(fn=update_leaderboard, inputs=[super_group_selector, model_group_selector], outputs=[data_component])
super_group_selector.change(fn=update_leaderboard, inputs=[super_group_selector, model_group_selector], outputs=[data_component])
model_group_selector.change(fn=update_leaderboard, inputs=[super_group_selector, model_group_selector], outputs=[data_component])
with gr.TabItem("π Data Information", elem_id="qa-tab-table2", id=2):
gr.Markdown(DATA_INFO, elem_classes="markdown-text")
with gr.TabItem("π Submit", elem_id="submit-tab", id=3):
with gr.Row():
gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text")
if __name__ == "__main__":
block.launch(share=True)
|