Clémentine commited on
Commit
388bfbd
1 Parent(s): 953dbe3

the webhooks will download the model at each update, and demo.load will restart the viewer at each page refresh

Browse files
Files changed (1) hide show
  1. app.py +78 -62
app.py CHANGED
@@ -44,7 +44,9 @@ from src.tools.plots import create_metric_plot_obj, create_plot_df, create_score
44
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
45
 
46
 
47
-
 
 
48
 
49
  def restart_space():
50
  API.restart_space(repo_id=REPO_ID, token=HF_TOKEN)
@@ -86,37 +88,49 @@ def download_dataset(repo_id, local_dir, repo_type="dataset", max_attempts=3, ba
86
  attempt += 1
87
  raise Exception(f"Failed to download {repo_id} after {max_attempts} attempts")
88
 
 
 
 
 
 
 
 
 
 
89
 
90
- def init_space(full_init: bool = True):
 
 
 
 
 
 
 
 
 
 
 
 
91
  """Initializes the application space, loading only necessary data."""
92
- if full_init:
93
  # These downloads only occur on full initialization
94
  try:
95
  download_dataset(QUEUE_REPO, EVAL_REQUESTS_PATH)
96
  except Exception:
97
  restart_space()
98
 
99
- # Always retrieve the leaderboard DataFrame
100
- leaderboard_dataset = datasets.load_dataset(AGGREGATED_REPO, "default", split="train", cache_dir=HF_HOME)
101
- leaderboard_df = get_leaderboard_df(
102
- leaderboard_dataset=leaderboard_dataset,
103
- cols=COLS,
104
- benchmark_cols=BENCHMARK_COLS,
105
- )
106
 
107
  # Evaluation queue DataFrame retrieval is independent of initialization detail level
108
- eval_queue_dfs = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
109
 
110
  return leaderboard_df, eval_queue_dfs
111
 
112
 
113
- # Convert the environment variable "LEADERBOARD_FULL_INIT" to a boolean value, defaulting to True if the variable is not set.
114
- # This controls whether a full initialization should be performed.
115
- do_full_init = os.getenv("LEADERBOARD_FULL_INIT", "True") == "True"
116
-
117
  # Calls the init_space function with the `full_init` parameter determined by the `do_full_init` variable.
118
  # This initializes various DataFrames used throughout the application, with the level of initialization detail controlled by the `do_full_init` flag.
119
- leaderboard_df, eval_queue_dfs = init_space(full_init=do_full_init)
120
  finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = eval_queue_dfs
121
 
122
 
@@ -125,6 +139,39 @@ finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = eval_queu
125
  # plot_df = create_plot_df(create_scores_df(leaderboard_df))
126
  # return plot_df
127
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
 
129
  demo = gr.Blocks(css=custom_css)
130
  with demo:
@@ -133,37 +180,7 @@ with demo:
133
 
134
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
135
  with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
136
- leaderboard = Leaderboard(
137
- value=leaderboard_df,
138
- datatype=[c.type for c in fields(AutoEvalColumn)],
139
- select_columns=SelectColumns(
140
- default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
141
- cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden or c.dummy],
142
- label="Select Columns to Display:",
143
- ),
144
- search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.fullname.name, AutoEvalColumn.license.name],
145
- hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
146
- filter_columns=[
147
- ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
148
- ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
149
- ColumnFilter(
150
- AutoEvalColumn.params.name,
151
- type="slider",
152
- min=0.01,
153
- max=150,
154
- label="Select the number of parameters (B)",
155
- ),
156
- ColumnFilter(
157
- AutoEvalColumn.still_on_hub.name, type="boolean", label="Private or deleted", default=True
158
- ),
159
- ColumnFilter(
160
- AutoEvalColumn.merged.name, type="boolean", label="Contains a merge/moerge", default=True
161
- ),
162
- ColumnFilter(AutoEvalColumn.moe.name, type="boolean", label="MoE", default=False),
163
- ColumnFilter(AutoEvalColumn.not_flagged.name, type="boolean", label="Flagged", default=True),
164
- ],
165
- bool_checkboxgroup_label="Hide models",
166
- )
167
 
168
  #with gr.TabItem("📈 Metrics through time", elem_id="llm-benchmark-tab-table", id=2):
169
  # with gr.Row():
@@ -288,16 +305,18 @@ with demo:
288
  show_copy_button=True,
289
  )
290
 
 
 
 
291
  demo.queue(default_concurrency_limit=40)
292
 
293
  # Start ephemeral Spaces on PRs (see config in README.md)
294
  from gradio_space_ci.webhook import IS_EPHEMERAL_SPACE, SPACE_ID, configure_space_ci
295
 
296
-
297
-
298
  def enable_space_ci_and_return_server(ui: gr.Blocks) -> WebhooksServer:
299
  # Taken from https://huggingface.co/spaces/Wauplin/gradio-space-ci/blob/075119aee75ab5e7150bf0814eec91c83482e790/src/gradio_space_ci/webhook.py#L61
300
  # Compared to original, this one do not monkeypatch Gradio which allows us to define more webhooks.
 
301
  if SPACE_ID is None:
302
  print("Not in a Space: Space CI disabled.")
303
  return WebhooksServer(ui=demo)
@@ -311,7 +330,7 @@ def enable_space_ci_and_return_server(ui: gr.Blocks) -> WebhooksServer:
311
  print(f"Enabling Space CI with config from README: {config}")
312
 
313
  return configure_space_ci(
314
- blocks=self,
315
  trusted_authors=config.get("trusted_authors"),
316
  private=config.get("private", "auto"),
317
  variables=config.get("variables", "auto"),
@@ -326,24 +345,21 @@ webhooks_server = enable_space_ci_and_return_server(ui=demo)
326
  # Add webhooks
327
  @webhooks_server.add_webhook
328
  async def update_leaderboard(payload: WebhookPayload) -> None:
 
329
  if payload.repo.type == "dataset" and payload.event.action == "update":
330
- leaderboard_dataset = datasets.load_dataset(AGGREGATED_REPO, "default", split="train", cache_dir=HF_HOME)
331
- leaderboard_df = get_leaderboard_df(
332
- leaderboard_dataset=leaderboard_dataset,
333
- cols=COLS,
334
- benchmark_cols=BENCHMARK_COLS,
 
 
335
  )
336
- leaderboard.value = leaderboard_df
337
 
338
  @webhooks_server.add_webhook
339
  async def update_queue(payload: WebhookPayload) -> None:
 
340
  if payload.repo.type == "dataset" and payload.event.action == "update":
341
  download_dataset(QUEUE_REPO, EVAL_REQUESTS_PATH)
342
- eval_queue_dfs = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
343
- finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = eval_queue_dfs
344
-
345
- finished_eval_table.value = finished_eval_queue_df
346
- running_eval_table.value = running_eval_queue_df
347
- pending_eval_table.value = pending_eval_queue_df
348
 
349
  webhooks_server.launch()
 
44
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
45
 
46
 
47
+ # Convert the environment variable "LEADERBOARD_FULL_INIT" to a boolean value, defaulting to True if the variable is not set.
48
+ # This controls whether a full initialization should be performed.
49
+ DO_FULL_INIT = os.getenv("LEADERBOARD_FULL_INIT", "True") == "True"
50
 
51
  def restart_space():
52
  API.restart_space(repo_id=REPO_ID, token=HF_TOKEN)
 
88
  attempt += 1
89
  raise Exception(f"Failed to download {repo_id} after {max_attempts} attempts")
90
 
91
+ def get_latest_data_leaderboard():
92
+ leaderboard_dataset = datasets.load_dataset(
93
+ AGGREGATED_REPO,
94
+ "default",
95
+ split="train",
96
+ cache_dir=HF_HOME,
97
+ download_mode=datasets.DownloadMode.REUSE_DATASET_IF_EXISTS, # Uses the cached dataset
98
+ verification_mode="no_checks"
99
+ )
100
 
101
+ leaderboard_df = get_leaderboard_df(
102
+ leaderboard_dataset=leaderboard_dataset,
103
+ cols=COLS,
104
+ benchmark_cols=BENCHMARK_COLS,
105
+ )
106
+
107
+ return leaderboard_df
108
+
109
+ def get_latest_data_queue():
110
+ eval_queue_dfs = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
111
+ return eval_queue_dfs
112
+
113
+ def init_space():
114
  """Initializes the application space, loading only necessary data."""
115
+ if DO_FULL_INIT:
116
  # These downloads only occur on full initialization
117
  try:
118
  download_dataset(QUEUE_REPO, EVAL_REQUESTS_PATH)
119
  except Exception:
120
  restart_space()
121
 
122
+ # Always redownload the leaderboard DataFrame
123
+ leaderboard_df = get_latest_data_leaderboard()
 
 
 
 
 
124
 
125
  # Evaluation queue DataFrame retrieval is independent of initialization detail level
126
+ eval_queue_dfs = get_latest_data_queue()
127
 
128
  return leaderboard_df, eval_queue_dfs
129
 
130
 
 
 
 
 
131
  # Calls the init_space function with the `full_init` parameter determined by the `do_full_init` variable.
132
  # This initializes various DataFrames used throughout the application, with the level of initialization detail controlled by the `do_full_init` flag.
133
+ leaderboard_df, eval_queue_dfs = init_space()
134
  finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = eval_queue_dfs
135
 
136
 
 
139
  # plot_df = create_plot_df(create_scores_df(leaderboard_df))
140
  # return plot_df
141
 
142
+ def init_leaderboard(dataframe):
143
+ return Leaderboard(
144
+ value = dataframe,
145
+ datatype=[c.type for c in fields(AutoEvalColumn)],
146
+ select_columns=SelectColumns(
147
+ default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
148
+ cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden or c.dummy],
149
+ label="Select Columns to Display:",
150
+ ),
151
+ search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.fullname.name, AutoEvalColumn.license.name],
152
+ hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
153
+ filter_columns=[
154
+ ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
155
+ ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
156
+ ColumnFilter(
157
+ AutoEvalColumn.params.name,
158
+ type="slider",
159
+ min=0.01,
160
+ max=150,
161
+ label="Select the number of parameters (B)",
162
+ ),
163
+ ColumnFilter(
164
+ AutoEvalColumn.still_on_hub.name, type="boolean", label="Private or deleted", default=True
165
+ ),
166
+ ColumnFilter(
167
+ AutoEvalColumn.merged.name, type="boolean", label="Contains a merge/moerge", default=True
168
+ ),
169
+ ColumnFilter(AutoEvalColumn.moe.name, type="boolean", label="MoE", default=False),
170
+ ColumnFilter(AutoEvalColumn.not_flagged.name, type="boolean", label="Flagged", default=True),
171
+ ],
172
+ bool_checkboxgroup_label="Hide models",
173
+ )
174
+
175
 
176
  demo = gr.Blocks(css=custom_css)
177
  with demo:
 
180
 
181
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
182
  with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
183
+ leaderboard = init_leaderboard(leaderboard_df)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
 
185
  #with gr.TabItem("📈 Metrics through time", elem_id="llm-benchmark-tab-table", id=2):
186
  # with gr.Row():
 
305
  show_copy_button=True,
306
  )
307
 
308
+ demo.load(fn=get_latest_data_leaderboard, inputs=None, outputs=[leaderboard])
309
+ demo.load(fn=get_latest_data_queue, inputs=None, outputs=[finished_eval_table, running_eval_table, pending_eval_table])
310
+
311
  demo.queue(default_concurrency_limit=40)
312
 
313
  # Start ephemeral Spaces on PRs (see config in README.md)
314
  from gradio_space_ci.webhook import IS_EPHEMERAL_SPACE, SPACE_ID, configure_space_ci
315
 
 
 
316
  def enable_space_ci_and_return_server(ui: gr.Blocks) -> WebhooksServer:
317
  # Taken from https://huggingface.co/spaces/Wauplin/gradio-space-ci/blob/075119aee75ab5e7150bf0814eec91c83482e790/src/gradio_space_ci/webhook.py#L61
318
  # Compared to original, this one do not monkeypatch Gradio which allows us to define more webhooks.
319
+ # ht to Lucain!
320
  if SPACE_ID is None:
321
  print("Not in a Space: Space CI disabled.")
322
  return WebhooksServer(ui=demo)
 
330
  print(f"Enabling Space CI with config from README: {config}")
331
 
332
  return configure_space_ci(
333
+ blocks=ui,
334
  trusted_authors=config.get("trusted_authors"),
335
  private=config.get("private", "auto"),
336
  variables=config.get("variables", "auto"),
 
345
  # Add webhooks
346
  @webhooks_server.add_webhook
347
  async def update_leaderboard(payload: WebhookPayload) -> None:
348
+ """Redownloads the leaderboard dataset each time it updates"""
349
  if payload.repo.type == "dataset" and payload.event.action == "update":
350
+ datasets.load_dataset(
351
+ AGGREGATED_REPO,
352
+ "default",
353
+ split="train",
354
+ cache_dir=HF_HOME,
355
+ download_mode=datasets.DownloadMode.FORCE_REDOWNLOAD,
356
+ verification_mode="no_checks"
357
  )
 
358
 
359
  @webhooks_server.add_webhook
360
  async def update_queue(payload: WebhookPayload) -> None:
361
+ """Redownloads the queue dataset each time it updates"""
362
  if payload.repo.type == "dataset" and payload.event.action == "update":
363
  download_dataset(QUEUE_REPO, EVAL_REQUESTS_PATH)
 
 
 
 
 
 
364
 
365
  webhooks_server.launch()