"""A gradio app that renders a static leaderboard. This is used for Hugging Face Space.""" import argparse import json from datetime import datetime import gradio as gr import pandas as pd import pytz from constants import * from constants import column_names # get the last updated time from the elo_ranks.all.jsonl file LAST_UPDATED = None # with open("_intro.md", "r") as f: # INTRO_MD = f.read() INTRO_MD = "" with open("_header.md", "r") as f: HEADER_MD = f.read() raw_data = None original_df = None def df_filters(mode_selection_radio, show_open_source_model_only): global original_df original_df.insert(0, "", range(1, 1 + len(original_df))) return original_df.copy() def _gstr(text): return gr.Text(text, visible=False) def _tab_leaderboard(): global original_df, available_models if True: default_mode = "greedy" default_main_df = df_filters(default_mode, False) leaderboard_table = gr.components.Dataframe( value=default_main_df, datatype= ["number", "markdown", "markdown", "number"], # max_rows=None, height=1000, elem_id="leaderboard-table", interactive=False, visible=True, column_widths=[50, 150, 150, 100, 120, 120, 100,100,110,100], wrap=True # min_width=60, ) def _tab_submit(): markdown_text = """ Please create an issue on our [Github](https://github.com/allenai/super-benchmark) repository with output of trajectories of your model and results. We will update the leaderboard accordingly. """ gr.Markdown("## 🚀 Submit Your Results\n\n" + markdown_text, elem_classes="markdown-text") def build_demo(): global original_df with gr.Blocks(theme=gr.themes.Soft(), css=css, js=js_light) as demo: # convert LAST_UPDATED to the PDT time LAST_UPDATED = datetime.now(pytz.timezone('US/Pacific')).strftime("%Y-%m-%d %H:%M:%S") header_md_text = HEADER_MD.replace("{LAST_UPDATED}", str(LAST_UPDATED)) gr.Markdown(header_md_text, elem_classes="markdown-text") with gr.Tabs(elem_classes="tab-buttons") as tabs: with gr.TabItem("🏅 Leaderboard", elem_id="od-benchmark-tab-table", id=0): _tab_leaderboard() with gr.TabItem("🚀 Submit Your Results", elem_id="od-benchmark-tab-table", id=3): _tab_submit() return demo def data_load(result_file): global raw_data, original_df print(f"Loading {result_file}") column_names_main = column_names.copy() # column_names_main.update({}) main_ordered_columns = ORDERED_COLUMN_NAMES # filter the data with Total Puzzles == 1000 click_url = True # read json file from the result_file with open(result_file, "r") as f: raw_data = json.load(f) # floatify the data, if possible for d in raw_data: for k, v in d.items(): try: d[k] = float(v) except: pass original_df = pd.DataFrame(raw_data) original_df.sort_values(by="Expert (Accuracy)", ascending=False, inplace=True) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--share", action="store_true") parser.add_argument("--result_file", help="Path to results table", default="ZeroEval-main/result_dirs/leaderboard.json") args = parser.parse_args() data_load(args.result_file) demo = build_demo() demo.launch(share=args.share, height=3000, width="100%")