File size: 7,417 Bytes
b2c8d29
4301eca
8553d06
 
 
 
 
 
eeb88fb
 
 
4301eca
8553d06
eeb88fb
 
 
 
 
4301eca
 
8553d06
eeb88fb
 
4301eca
b2c8d29
eeb88fb
 
 
 
 
 
 
8553d06
 
b2c8d29
 
8553d06
 
 
 
 
 
 
 
 
 
 
 
b2c8d29
eeb88fb
 
4301eca
 
eeb88fb
 
 
 
2a2ba62
 
4301eca
eeb88fb
 
 
 
 
 
 
b2c8d29
8553d06
eeb88fb
f724d2e
eeb88fb
8553d06
 
eeb88fb
8553d06
 
 
 
eeb88fb
8553d06
 
 
4301eca
8553d06
 
4301eca
8553d06
eeb88fb
 
 
 
 
 
4301eca
 
 
 
eeb88fb
 
 
 
 
4301eca
 
eeb88fb
 
 
 
 
 
4301eca
eeb88fb
 
 
 
 
8553d06
eeb88fb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b2c8d29
8553d06
 
 
be407a0
b2c8d29
8553d06
b2c8d29
 
 
8553d06
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import gradio as gr
from utils import DefaultDataLoader, SingleImageDataLoader
import os
from constants import *

# Get the directory of the current script
current_dir = os.path.dirname(os.path.abspath(__file__))

# Construct paths to CSS files
base_css_file = os.path.join(current_dir, "static", "css", "style.css")
default_css_file = os.path.join(current_dir, "static", "css", "default.css")
si_css_file = os.path.join(current_dir, "static", "css", "single_image.css")

# Read CSS files
with open(base_css_file, "r") as f:
    base_css = f.read()
with open(default_css_file, "r") as f:
    default_css = f.read()
with open(si_css_file, "r") as f:
    si_css = f.read()

# Initialize data loaders
default_loader = DefaultDataLoader()
si_loader = SingleImageDataLoader()

with gr.Blocks() as block:
    # Add a style element that we'll update
    css_style = gr.HTML(
        f"<style>{base_css}\n{default_css}</style>",
        visible=False
    )
    
    gr.Markdown(
        LEADERBOARD_INTRODUCTION
    )
    with gr.Tabs(elem_classes="tab-buttons") as tabs:
        with gr.TabItem("πŸ“Š MEGA-Bench", elem_id="qa-tab-table1", id=1):
            with gr.Row():
                with gr.Accordion("Citation", open=False):
                    citation_button = gr.Textbox(
                        value=CITATION_BUTTON_TEXT,
                        label=CITATION_BUTTON_LABEL,
                        elem_id="citation-button",
                        lines=10,
                    )
            gr.Markdown(
                TABLE_INTRODUCTION
            )

            with gr.Row():
                table_selector = gr.Radio(
                    choices=["Default", "Single Image"],
                    label="Select table to display. Default: all MEGA-Bench tasks; Single Image: single-image tasks only.",
                    value="Default"
                )

            # Define different captions for each table
            default_caption = "**Table 1: MEGA-Bench full results.** The number in the parentheses is the number of tasks of each keyword. <br> The Core set contains $N_{\\text{core}} = 440$ tasks evaluated by rule-based metrics, and the Open-ended set contains $N_{\\text{open}} = 65$ tasks evaluated by a VLM judge (we use GPT-4o-0806). <br> Different from the results in our paper, we only use the Core results with CoT prompting here for clarity and compatibility with the released data. <br> $\\text{Overall} \\ = \\ \\frac{\\text{Core} \\ \\cdot \\ N_{\\text{core}} \\ + \\ \\text{Open-ended} \\ \\cdot \\ N_{\\text{open}}}{N_{\\text{core}} \\ + \\ N_{\\text{open}}}$ " 

            single_image_caption = "**Table 2: MEGA-Bench Single-image setting results.** The number in the parentheses is the number of tasks in each keyword. <br> This subset contains 273 single-image tasks from the Core set and 42 single-image tasks from the Open-ended set. For open-source models, we drop the image input in the 1-shot demonstration example so that the entire query contains a single image only. <br> Compared to the default table, some models with only single-image support are added."

            caption_component = gr.Markdown(
                value=default_caption,
                elem_classes="table-caption",
                latex_delimiters=[{"left": "$", "right": "$", "display": False}],
            )

            with gr.Row():
                super_group_selector = gr.Radio(
                    choices=list(default_loader.SUPER_GROUPS.keys()),
                    label="Select a dimension to display breakdown results. We use different column colors to distinguish the overall benchmark scores and breakdown results.",
                    value=list(default_loader.SUPER_GROUPS.keys())[0]
                )
                model_group_selector = gr.Radio(
                    choices=list(default_loader.BASE_MODEL_GROUPS.keys()),
                    label="Select a model group",
                    value="All"
                )

            initial_headers, initial_data = default_loader.get_leaderboard_data(list(default_loader.SUPER_GROUPS.keys())[0], "All")
            data_component = gr.Dataframe(
                value=initial_data,
                headers=initial_headers,
                datatype=["html"] + ["number"] * (len(initial_headers) - 1),
                interactive=False,
                elem_classes="custom-dataframe",
                max_height=2400,
            )

            def update_table_and_caption(table_type, super_group, model_group):
                if table_type == "Default":
                    headers, data = default_loader.get_leaderboard_data(super_group, model_group)
                    caption = default_caption
                    current_css = f"{base_css}\n{default_css}"
                else:  # Single-image
                    headers, data = si_loader.get_leaderboard_data(super_group, model_group)
                    caption = single_image_caption
                    current_css = f"{base_css}\n{si_css}"
                
                return [
                    gr.Dataframe(
                        value=data,
                        headers=headers,
                        datatype=["html"] + ["number"] * (len(headers) - 1),
                        interactive=False,
                    ),
                    caption,
                    f"<style>{current_css}</style>"
                ]

            def update_selectors(table_type):
                loader = default_loader if table_type == "Default" else si_loader
                return [
                    gr.Radio(choices=list(loader.SUPER_GROUPS.keys())),
                    gr.Radio(choices=list(loader.MODEL_GROUPS.keys()))
                ]

            refresh_button = gr.Button("Refresh")
            
            # Update click and change handlers to include caption updates
            refresh_button.click(
                fn=update_table_and_caption, 
                inputs=[table_selector, super_group_selector, model_group_selector], 
                outputs=[data_component, caption_component, css_style]
            )
            super_group_selector.change(
                fn=update_table_and_caption, 
                inputs=[table_selector, super_group_selector, model_group_selector], 
                outputs=[data_component, caption_component, css_style]
            )
            model_group_selector.change(
                fn=update_table_and_caption, 
                inputs=[table_selector, super_group_selector, model_group_selector], 
                outputs=[data_component, caption_component, css_style]
            )
            table_selector.change(
                fn=update_selectors,
                inputs=[table_selector],
                outputs=[super_group_selector, model_group_selector]
            ).then(
                fn=update_table_and_caption,
                inputs=[table_selector, super_group_selector, model_group_selector],
                outputs=[data_component, caption_component, css_style]
            )

        with gr.TabItem("πŸ“ Data Information", elem_id="qa-tab-table2", id=2):
            gr.Markdown(DATA_INFO, elem_classes="markdown-text")

        with gr.TabItem("πŸš€ Submit", elem_id="submit-tab", id=3):
            with gr.Row():
                gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text")



if __name__ == "__main__":
    block.launch(share=True)