Spaces:

open-llm-leaderboard
/

open_llm_leaderboard

Running on CPU Upgrade

App Files Files Community

1079

Clémentine commited on May 31, 2024

Commit

388bfbd

1 Parent(s): 953dbe3

the webhooks will download the model at each update, and demo.load will restart the viewer at each page refresh

Browse files

Files changed (1) hide show

app.py +78 -62

app.py CHANGED Viewed

@@ -44,7 +44,9 @@ from src.tools.plots import create_metric_plot_obj, create_plot_df, create_score
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 def restart_space():
     API.restart_space(repo_id=REPO_ID, token=HF_TOKEN)
@@ -86,37 +88,49 @@ def download_dataset(repo_id, local_dir, repo_type="dataset", max_attempts=3, ba
             attempt += 1
     raise Exception(f"Failed to download {repo_id} after {max_attempts} attempts")
-def init_space(full_init: bool = True):
     """Initializes the application space, loading only necessary data."""
-    if full_init:
         # These downloads only occur on full initialization
         try:
             download_dataset(QUEUE_REPO, EVAL_REQUESTS_PATH)
         except Exception:
             restart_space()
-    # Always retrieve the leaderboard DataFrame
-    leaderboard_dataset = datasets.load_dataset(AGGREGATED_REPO, "default", split="train", cache_dir=HF_HOME)
-    leaderboard_df = get_leaderboard_df(
-        leaderboard_dataset=leaderboard_dataset,
-        cols=COLS,
-        benchmark_cols=BENCHMARK_COLS,
-    )
     # Evaluation queue DataFrame retrieval is independent of initialization detail level
-    eval_queue_dfs = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
     return leaderboard_df, eval_queue_dfs
-# Convert the environment variable "LEADERBOARD_FULL_INIT" to a boolean value, defaulting to True if the variable is not set.
-# This controls whether a full initialization should be performed.
-do_full_init = os.getenv("LEADERBOARD_FULL_INIT", "True") == "True"
 # Calls the init_space function with the `full_init` parameter determined by the `do_full_init` variable.
 # This initializes various DataFrames used throughout the application, with the level of initialization detail controlled by the `do_full_init` flag.
-leaderboard_df, eval_queue_dfs = init_space(full_init=do_full_init)
 finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = eval_queue_dfs
@@ -125,6 +139,39 @@ finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = eval_queu
 #    plot_df = create_plot_df(create_scores_df(leaderboard_df))
 #    return plot_df
 demo = gr.Blocks(css=custom_css)
 with demo:
@@ -133,37 +180,7 @@ with demo:
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
         with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
-            leaderboard = Leaderboard(
-                value=leaderboard_df,
-                datatype=[c.type for c in fields(AutoEvalColumn)],
-                select_columns=SelectColumns(
-                    default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
-                    cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden or c.dummy],
-                    label="Select Columns to Display:",
-                ),
-                search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.fullname.name, AutoEvalColumn.license.name],
-                hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
-                filter_columns=[
-                    ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
-                    ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
-                    ColumnFilter(
-                        AutoEvalColumn.params.name,
-                        type="slider",
-                        min=0.01,
-                        max=150,
-                        label="Select the number of parameters (B)",
-                    ),
-                    ColumnFilter(
-                        AutoEvalColumn.still_on_hub.name, type="boolean", label="Private or deleted", default=True
-                    ),
-                    ColumnFilter(
-                        AutoEvalColumn.merged.name, type="boolean", label="Contains a merge/moerge", default=True
-                    ),
-                    ColumnFilter(AutoEvalColumn.moe.name, type="boolean", label="MoE", default=False),
-                    ColumnFilter(AutoEvalColumn.not_flagged.name, type="boolean", label="Flagged", default=True),
-                ],
-                bool_checkboxgroup_label="Hide models",
-            )
         #with gr.TabItem("📈 Metrics through time", elem_id="llm-benchmark-tab-table", id=2):
         #    with gr.Row():
@@ -288,16 +305,18 @@ with demo:
                 show_copy_button=True,
             )
 demo.queue(default_concurrency_limit=40)
 # Start ephemeral Spaces on PRs (see config in README.md)
 from gradio_space_ci.webhook import IS_EPHEMERAL_SPACE, SPACE_ID, configure_space_ci
 def enable_space_ci_and_return_server(ui: gr.Blocks) -> WebhooksServer:
     # Taken from https://huggingface.co/spaces/Wauplin/gradio-space-ci/blob/075119aee75ab5e7150bf0814eec91c83482e790/src/gradio_space_ci/webhook.py#L61
     # Compared to original, this one do not monkeypatch Gradio which allows us to define more webhooks.
     if SPACE_ID is None:
         print("Not in a Space: Space CI disabled.")
         return WebhooksServer(ui=demo)
@@ -311,7 +330,7 @@ def enable_space_ci_and_return_server(ui: gr.Blocks) -> WebhooksServer:
     print(f"Enabling Space CI with config from README: {config}")
     return configure_space_ci(
-        blocks=self,
         trusted_authors=config.get("trusted_authors"),
         private=config.get("private", "auto"),
         variables=config.get("variables", "auto"),
@@ -326,24 +345,21 @@ webhooks_server = enable_space_ci_and_return_server(ui=demo)
 # Add webhooks
 @webhooks_server.add_webhook
 async def update_leaderboard(payload: WebhookPayload) -> None:
     if payload.repo.type == "dataset" and payload.event.action == "update":
-        leaderboard_dataset = datasets.load_dataset(AGGREGATED_REPO, "default", split="train", cache_dir=HF_HOME)
-        leaderboard_df = get_leaderboard_df(
-            leaderboard_dataset=leaderboard_dataset,
-            cols=COLS,
-            benchmark_cols=BENCHMARK_COLS,
         )
-        leaderboard.value = leaderboard_df
 @webhooks_server.add_webhook
 async def update_queue(payload: WebhookPayload) -> None:
     if payload.repo.type == "dataset" and payload.event.action == "update":
         download_dataset(QUEUE_REPO, EVAL_REQUESTS_PATH)
-        eval_queue_dfs = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
-        finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = eval_queue_dfs
-        finished_eval_table.value = finished_eval_queue_df
-        running_eval_table.value = running_eval_queue_df
-        pending_eval_table.value = pending_eval_queue_df
 webhooks_server.launch()

 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
+# Convert the environment variable "LEADERBOARD_FULL_INIT" to a boolean value, defaulting to True if the variable is not set.
+# This controls whether a full initialization should be performed.
+DO_FULL_INIT = os.getenv("LEADERBOARD_FULL_INIT", "True") == "True"
 def restart_space():
     API.restart_space(repo_id=REPO_ID, token=HF_TOKEN)
             attempt += 1
     raise Exception(f"Failed to download {repo_id} after {max_attempts} attempts")
+def get_latest_data_leaderboard():
+    leaderboard_dataset = datasets.load_dataset(
+        AGGREGATED_REPO,
+        "default",
+        split="train",
+        cache_dir=HF_HOME,
+        download_mode=datasets.DownloadMode.REUSE_DATASET_IF_EXISTS, # Uses the cached dataset
+        verification_mode="no_checks"
+    )
+    leaderboard_df = get_leaderboard_df(
+        leaderboard_dataset=leaderboard_dataset,
+        cols=COLS,
+        benchmark_cols=BENCHMARK_COLS,
+    )
+    return leaderboard_df
+def get_latest_data_queue():
+    eval_queue_dfs = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
+    return eval_queue_dfs
+def init_space():
     """Initializes the application space, loading only necessary data."""
+    if DO_FULL_INIT:
         # These downloads only occur on full initialization
         try:
             download_dataset(QUEUE_REPO, EVAL_REQUESTS_PATH)
         except Exception:
             restart_space()
+    # Always redownload the leaderboard DataFrame
+    leaderboard_df = get_latest_data_leaderboard()
     # Evaluation queue DataFrame retrieval is independent of initialization detail level
+    eval_queue_dfs = get_latest_data_queue()
     return leaderboard_df, eval_queue_dfs
 # Calls the init_space function with the `full_init` parameter determined by the `do_full_init` variable.
 # This initializes various DataFrames used throughout the application, with the level of initialization detail controlled by the `do_full_init` flag.
+leaderboard_df, eval_queue_dfs = init_space()
 finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = eval_queue_dfs
 #    plot_df = create_plot_df(create_scores_df(leaderboard_df))
 #    return plot_df
+def init_leaderboard(dataframe):
+    return Leaderboard(
+        value = dataframe,
+        datatype=[c.type for c in fields(AutoEvalColumn)],
+        select_columns=SelectColumns(
+            default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
+            cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden or c.dummy],
+            label="Select Columns to Display:",
+        ),
+        search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.fullname.name, AutoEvalColumn.license.name],
+        hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
+        filter_columns=[
+            ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
+            ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
+            ColumnFilter(
+                AutoEvalColumn.params.name,
+                type="slider",
+                min=0.01,
+                max=150,
+                label="Select the number of parameters (B)",
+            ),
+            ColumnFilter(
+                AutoEvalColumn.still_on_hub.name, type="boolean", label="Private or deleted", default=True
+            ),
+            ColumnFilter(
+                AutoEvalColumn.merged.name, type="boolean", label="Contains a merge/moerge", default=True
+            ),
+            ColumnFilter(AutoEvalColumn.moe.name, type="boolean", label="MoE", default=False),
+            ColumnFilter(AutoEvalColumn.not_flagged.name, type="boolean", label="Flagged", default=True),
+        ],
+        bool_checkboxgroup_label="Hide models",
+    )
 demo = gr.Blocks(css=custom_css)
 with demo:
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
         with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
+            leaderboard = init_leaderboard(leaderboard_df)
         #with gr.TabItem("📈 Metrics through time", elem_id="llm-benchmark-tab-table", id=2):
         #    with gr.Row():
                 show_copy_button=True,
             )
+    demo.load(fn=get_latest_data_leaderboard, inputs=None, outputs=[leaderboard])
+    demo.load(fn=get_latest_data_queue, inputs=None, outputs=[finished_eval_table, running_eval_table, pending_eval_table])
 demo.queue(default_concurrency_limit=40)
 # Start ephemeral Spaces on PRs (see config in README.md)
 from gradio_space_ci.webhook import IS_EPHEMERAL_SPACE, SPACE_ID, configure_space_ci
 def enable_space_ci_and_return_server(ui: gr.Blocks) -> WebhooksServer:
     # Taken from https://huggingface.co/spaces/Wauplin/gradio-space-ci/blob/075119aee75ab5e7150bf0814eec91c83482e790/src/gradio_space_ci/webhook.py#L61
     # Compared to original, this one do not monkeypatch Gradio which allows us to define more webhooks.
+    # ht to Lucain!
     if SPACE_ID is None:
         print("Not in a Space: Space CI disabled.")
         return WebhooksServer(ui=demo)
     print(f"Enabling Space CI with config from README: {config}")
     return configure_space_ci(
+        blocks=ui,
         trusted_authors=config.get("trusted_authors"),
         private=config.get("private", "auto"),
         variables=config.get("variables", "auto"),
 # Add webhooks
 @webhooks_server.add_webhook
 async def update_leaderboard(payload: WebhookPayload) -> None:
+    """Redownloads the leaderboard dataset each time it updates"""
     if payload.repo.type == "dataset" and payload.event.action == "update":
+        datasets.load_dataset(
+            AGGREGATED_REPO,
+            "default",
+            split="train",
+            cache_dir=HF_HOME,
+            download_mode=datasets.DownloadMode.FORCE_REDOWNLOAD,
+            verification_mode="no_checks"
         )
 @webhooks_server.add_webhook
 async def update_queue(payload: WebhookPayload) -> None:
+    """Redownloads the queue dataset each time it updates"""
     if payload.repo.type == "dataset" and payload.event.action == "update":
         download_dataset(QUEUE_REPO, EVAL_REQUESTS_PATH)
 webhooks_server.launch()