import gradio as gr from utils import MEGABenchEvalDataLoader import os from constants import * # Get the directory of the current script current_dir = os.path.dirname(os.path.abspath(__file__)) # Construct paths to CSS files base_css_file = os.path.join(current_dir, "static", "css", "style.css") default_css_file = os.path.join(current_dir, "static", "css", "default.css") si_css_file = os.path.join(current_dir, "static", "css", "single_image.css") # Read CSS files with open(base_css_file, "r") as f: base_css = f.read() with open(default_css_file, "r") as f: default_css = f.read() with open(si_css_file, "r") as f: si_css = f.read() # Initialize data loaders default_loader = MEGABenchEvalDataLoader("./static/eval_results/Default") si_loader = MEGABenchEvalDataLoader("./static/eval_results/SI") with gr.Blocks() as block: # Add a style element that we'll update css_style = gr.HTML( f"", visible=False ) gr.Markdown( LEADERBOARD_INTRODUCTION ) with gr.Tabs(elem_classes="tab-buttons") as tabs: with gr.TabItem("📊 MEGA-Bench", elem_id="qa-tab-table1", id=1): with gr.Row(): with gr.Accordion("Citation", open=False): citation_button = gr.Textbox( value=CITATION_BUTTON_TEXT, label=CITATION_BUTTON_LABEL, elem_id="citation-button", lines=10, ) gr.Markdown( TABLE_INTRODUCTION ) with gr.Row(): table_selector = gr.Radio( choices=["Default", "Single Image"], label="Select table to display. Default: all MEGA-Bench tasks; Single Image: single-image tasks only.", value="Default" ) # Define different captions for each table default_caption = "**Table 1: MEGA-Bench full results.** The number in the parentheses is the number of tasks of each keyword.
The Core set contains $N_{\\text{core}} = 440$ tasks evaluated by rule-based metrics, and the Open-ended set contains $N_{\\text{open}} = 65$ tasks evaluated by a VLM judge (we use GPT-4o-0806).
Different from the results in our paper, we only use the Core results with CoT prompting here for clarity and compatibility with the released data.
$\\text{Overall} \\ = \\ \\frac{\\text{Core} \\ \\cdot \\ N_{\\text{core}} \\ + \\ \\text{Open-ended} \\ \\cdot \\ N_{\\text{open}}}{N_{\\text{core}} \\ + \\ N_{\\text{open}}}$ " single_image_caption = "**Table 2: MEGA-Bench Single-image setting results.** The number in the parentheses is the number of tasks in each keyword.
This subset contains 273 single-image tasks from the Core set and 42 single-image tasks from the Open-ended set. For open-source models, we drop the image input in the 1-shot demonstration example so that the entire query contains a single image only.
Compared to the default table, some models with only single-image support are added." caption_component = gr.Markdown( value=default_caption, elem_classes="table-caption", latex_delimiters=[{"left": "$", "right": "$", "display": False}], ) with gr.Row(): super_group_selector = gr.Radio( choices=list(default_loader.SUPER_GROUPS.keys()), label="Select a dimension to display breakdown results. We use different column colors to distinguish the overall benchmark scores and breakdown results.", value=list(default_loader.SUPER_GROUPS.keys())[0] ) model_group_selector = gr.Radio( choices=list(BASE_MODEL_GROUPS.keys()), label="Select a model group", value="All" ) initial_headers, initial_data = default_loader.get_leaderboard_data(list(default_loader.SUPER_GROUPS.keys())[0], "All") data_component = gr.Dataframe( value=initial_data, headers=initial_headers, datatype=["html"] + ["number"] * (len(initial_headers) - 1), interactive=False, elem_classes="custom-dataframe", max_height=2400, ) def update_table_and_caption(table_type, super_group, model_group): if table_type == "Default": headers, data = default_loader.get_leaderboard_data(super_group, model_group) caption = default_caption current_css = f"{base_css}\n{default_css}" else: # Single-image headers, data = si_loader.get_leaderboard_data(super_group, model_group) caption = single_image_caption current_css = f"{base_css}\n{si_css}" return [ gr.Dataframe( value=data, headers=headers, datatype=["html"] + ["number"] * (len(headers) - 1), interactive=False, ), caption, f"" ] def update_selectors(table_type): loader = default_loader if table_type == "Default" else si_loader return [ gr.Radio(choices=list(loader.SUPER_GROUPS.keys())), gr.Radio(choices=list(loader.MODEL_GROUPS.keys())) ] refresh_button = gr.Button("Refresh") # Update click and change handlers to include caption updates refresh_button.click( fn=update_table_and_caption, inputs=[table_selector, super_group_selector, model_group_selector], outputs=[data_component, caption_component, css_style] ) super_group_selector.change( fn=update_table_and_caption, inputs=[table_selector, super_group_selector, model_group_selector], outputs=[data_component, caption_component, css_style] ) model_group_selector.change( fn=update_table_and_caption, inputs=[table_selector, super_group_selector, model_group_selector], outputs=[data_component, caption_component, css_style] ) table_selector.change( fn=update_selectors, inputs=[table_selector], outputs=[super_group_selector, model_group_selector] ).then( fn=update_table_and_caption, inputs=[table_selector, super_group_selector, model_group_selector], outputs=[data_component, caption_component, css_style] ) with gr.TabItem("📝 Data Information", elem_id="qa-tab-table2", id=2): gr.Markdown(DATA_INFO, elem_classes="markdown-text") with gr.TabItem("🚀 Submit", elem_id="submit-tab", id=3): with gr.Row(): gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text") if __name__ == "__main__": block.launch(share=True)