MEGA-Bench / app.py
cccjc's picture
update results & separate results organization
2a2ba62
raw
history blame
7.42 kB
import gradio as gr
from utils import DefaultDataLoader, SingleImageDataLoader
import os
from constants import *
# Get the directory of the current script
current_dir = os.path.dirname(os.path.abspath(__file__))
# Construct paths to CSS files
base_css_file = os.path.join(current_dir, "static", "css", "style.css")
default_css_file = os.path.join(current_dir, "static", "css", "default.css")
si_css_file = os.path.join(current_dir, "static", "css", "single_image.css")
# Read CSS files
with open(base_css_file, "r") as f:
base_css = f.read()
with open(default_css_file, "r") as f:
default_css = f.read()
with open(si_css_file, "r") as f:
si_css = f.read()
# Initialize data loaders
default_loader = DefaultDataLoader()
si_loader = SingleImageDataLoader()
with gr.Blocks() as block:
# Add a style element that we'll update
css_style = gr.HTML(
f"<style>{base_css}\n{default_css}</style>",
visible=False
)
gr.Markdown(
LEADERBOARD_INTRODUCTION
)
with gr.Tabs(elem_classes="tab-buttons") as tabs:
with gr.TabItem("πŸ“Š MEGA-Bench", elem_id="qa-tab-table1", id=1):
with gr.Row():
with gr.Accordion("Citation", open=False):
citation_button = gr.Textbox(
value=CITATION_BUTTON_TEXT,
label=CITATION_BUTTON_LABEL,
elem_id="citation-button",
lines=10,
)
gr.Markdown(
TABLE_INTRODUCTION
)
with gr.Row():
table_selector = gr.Radio(
choices=["Default", "Single Image"],
label="Select table to display. Default: all MEGA-Bench tasks; Single Image: single-image tasks only.",
value="Default"
)
# Define different captions for each table
default_caption = "**Table 1: MEGA-Bench full results.** The number in the parentheses is the number of tasks of each keyword. <br> The Core set contains $N_{\\text{core}} = 440$ tasks evaluated by rule-based metrics, and the Open-ended set contains $N_{\\text{open}} = 65$ tasks evaluated by a VLM judge (we use GPT-4o-0806). <br> Different from the results in our paper, we only use the Core results with CoT prompting here for clarity and compatibility with the released data. <br> $\\text{Overall} \\ = \\ \\frac{\\text{Core} \\ \\cdot \\ N_{\\text{core}} \\ + \\ \\text{Open-ended} \\ \\cdot \\ N_{\\text{open}}}{N_{\\text{core}} \\ + \\ N_{\\text{open}}}$ "
single_image_caption = "**Table 2: MEGA-Bench Single-image setting results.** The number in the parentheses is the number of tasks in each keyword. <br> This subset contains 273 single-image tasks from the Core set and 42 single-image tasks from the Open-ended set. For open-source models, we drop the image input in the 1-shot demonstration example so that the entire query contains a single image only. <br> Compared to the default table, some models with only single-image support are added."
caption_component = gr.Markdown(
value=default_caption,
elem_classes="table-caption",
latex_delimiters=[{"left": "$", "right": "$", "display": False}],
)
with gr.Row():
super_group_selector = gr.Radio(
choices=list(default_loader.SUPER_GROUPS.keys()),
label="Select a dimension to display breakdown results. We use different column colors to distinguish the overall benchmark scores and breakdown results.",
value=list(default_loader.SUPER_GROUPS.keys())[0]
)
model_group_selector = gr.Radio(
choices=list(default_loader.BASE_MODEL_GROUPS.keys()),
label="Select a model group",
value="All"
)
initial_headers, initial_data = default_loader.get_leaderboard_data(list(default_loader.SUPER_GROUPS.keys())[0], "All")
data_component = gr.Dataframe(
value=initial_data,
headers=initial_headers,
datatype=["html"] + ["number"] * (len(initial_headers) - 1),
interactive=False,
elem_classes="custom-dataframe",
max_height=2400,
)
def update_table_and_caption(table_type, super_group, model_group):
if table_type == "Default":
headers, data = default_loader.get_leaderboard_data(super_group, model_group)
caption = default_caption
current_css = f"{base_css}\n{default_css}"
else: # Single-image
headers, data = si_loader.get_leaderboard_data(super_group, model_group)
caption = single_image_caption
current_css = f"{base_css}\n{si_css}"
return [
gr.Dataframe(
value=data,
headers=headers,
datatype=["html"] + ["number"] * (len(headers) - 1),
interactive=False,
),
caption,
f"<style>{current_css}</style>"
]
def update_selectors(table_type):
loader = default_loader if table_type == "Default" else si_loader
return [
gr.Radio(choices=list(loader.SUPER_GROUPS.keys())),
gr.Radio(choices=list(loader.MODEL_GROUPS.keys()))
]
refresh_button = gr.Button("Refresh")
# Update click and change handlers to include caption updates
refresh_button.click(
fn=update_table_and_caption,
inputs=[table_selector, super_group_selector, model_group_selector],
outputs=[data_component, caption_component, css_style]
)
super_group_selector.change(
fn=update_table_and_caption,
inputs=[table_selector, super_group_selector, model_group_selector],
outputs=[data_component, caption_component, css_style]
)
model_group_selector.change(
fn=update_table_and_caption,
inputs=[table_selector, super_group_selector, model_group_selector],
outputs=[data_component, caption_component, css_style]
)
table_selector.change(
fn=update_selectors,
inputs=[table_selector],
outputs=[super_group_selector, model_group_selector]
).then(
fn=update_table_and_caption,
inputs=[table_selector, super_group_selector, model_group_selector],
outputs=[data_component, caption_component, css_style]
)
with gr.TabItem("πŸ“ Data Information", elem_id="qa-tab-table2", id=2):
gr.Markdown(DATA_INFO, elem_classes="markdown-text")
with gr.TabItem("πŸš€ Submit", elem_id="submit-tab", id=3):
with gr.Row():
gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text")
if __name__ == "__main__":
block.launch(share=True)