alozowski commited on
Commit
34f418b
·
1 Parent(s): 37b898a

Improved leaderboard update [wip]

Browse files
Files changed (2) hide show
  1. app.py +18 -50
  2. src/leaderboard/data.py +79 -0
app.py CHANGED
@@ -44,6 +44,7 @@ from src.envs import (
44
  HF_HOME,
45
  )
46
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
 
47
  from src.submission.submit import add_new_eval
48
  from src.voting.vote_system import VoteManager, run_scheduler
49
 
@@ -59,13 +60,17 @@ DO_FULL_INIT = True # os.getenv("LEADERBOARD_FULL_INIT", "True") == "True"
59
  NEW_DATA_ON_LEADERBOARD = True
60
  LEADERBOARD_DF = None
61
 
 
 
 
 
62
  def restart_space():
63
  logging.info(f"Restarting space with repo ID: {REPO_ID}")
64
  try:
65
  # Check if new data is pending and download if necessary
66
  if NEW_DATA_ON_LEADERBOARD:
67
  logging.info("Fetching latest leaderboard data before restart.")
68
- get_latest_data_leaderboard()
69
 
70
  # Now restart the space
71
  API.restart_space(repo_id=REPO_ID, token=HF_TOKEN)
@@ -109,37 +114,6 @@ def download_dataset(repo_id, local_dir, repo_type="dataset", max_attempts=3, ba
109
  attempt += 1
110
  raise Exception(f"Failed to download {repo_id} after {max_attempts} attempts")
111
 
112
- def get_latest_data_leaderboard(leaderboard_initial_df=None):
113
- global NEW_DATA_ON_LEADERBOARD
114
- global LEADERBOARD_DF
115
- if NEW_DATA_ON_LEADERBOARD:
116
- logging.info("Leaderboard updated at reload!")
117
- try:
118
- leaderboard_dataset = datasets.load_dataset(
119
- AGGREGATED_REPO,
120
- "default",
121
- split="train",
122
- cache_dir=HF_HOME,
123
- download_mode=datasets.DownloadMode.FORCE_REDOWNLOAD, # Always download fresh data
124
- verification_mode="no_checks"
125
- )
126
- LEADERBOARD_DF = get_leaderboard_df(
127
- leaderboard_dataset=leaderboard_dataset,
128
- cols=COLS,
129
- benchmark_cols=BENCHMARK_COLS,
130
- )
131
- logging.info("Leaderboard dataset successfully downloaded.")
132
- except Exception as e:
133
- logging.error(f"Failed to download leaderboard dataset: {e}")
134
- return
135
-
136
- # Reset the flag after successful download
137
- NEW_DATA_ON_LEADERBOARD = False
138
- else:
139
- LEADERBOARD_DF = leaderboard_initial_df
140
- logging.info("Using cached leaderboard dataset.")
141
- return LEADERBOARD_DF
142
-
143
 
144
  def get_latest_data_queue():
145
  eval_queue_dfs = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
@@ -147,8 +121,7 @@ def get_latest_data_queue():
147
 
148
  def init_space():
149
  """Initializes the application space, loading only necessary data."""
150
- global NEW_DATA_ON_LEADERBOARD
151
- NEW_DATA_ON_LEADERBOARD = True # Ensure new data is always pulled on restart
152
 
153
  if DO_FULL_INIT:
154
  # These downloads only occur on full initialization
@@ -158,18 +131,14 @@ def init_space():
158
  except Exception:
159
  restart_space()
160
 
161
- # Always redownload the leaderboard DataFrame
162
- global LEADERBOARD_DF
163
- LEADERBOARD_DF = get_latest_data_leaderboard()
164
-
165
  # Evaluation queue DataFrame retrieval is independent of initialization detail level
166
  eval_queue_dfs = get_latest_data_queue()
167
-
168
- return LEADERBOARD_DF, eval_queue_dfs
169
 
170
  # Initialize VoteManager
171
  vote_manager = VoteManager(VOTES_PATH, EVAL_REQUESTS_PATH, VOTES_REPO)
172
 
 
173
 
174
  # Schedule the upload_votes method to run every 15 minutes
175
  schedule.every(15).minutes.do(vote_manager.upload_votes)
@@ -180,10 +149,11 @@ scheduler_thread.start()
180
 
181
  # Calls the init_space function with the `full_init` parameter determined by the `do_full_init` variable.
182
  # This initializes various DataFrames used throughout the application, with the level of initialization detail controlled by the `do_full_init` flag.
183
- LEADERBOARD_DF, eval_queue_dfs = init_space()
184
  finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = eval_queue_dfs
185
 
186
 
 
187
  # Function to check if a user is logged in
188
  def check_login(profile: gr.OAuthProfile | None) -> bool:
189
  if profile is None:
@@ -193,8 +163,11 @@ def check_login(profile: gr.OAuthProfile | None) -> bool:
193
  def init_leaderboard(dataframe):
194
  if dataframe is None or dataframe.empty:
195
  raise ValueError("Leaderboard DataFrame is empty or None.")
 
 
 
196
  return Leaderboard(
197
- value=dataframe,
198
  datatype=[c.type for c in fields(AutoEvalColumn)],
199
  select_columns=SelectColumns(
200
  default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
@@ -236,7 +209,7 @@ with main_block:
236
 
237
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
238
  with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
239
- leaderboard = init_leaderboard(LEADERBOARD_DF)
240
 
241
  with gr.TabItem("🚀 Submit ", elem_id="llm-benchmark-tab-table", id=5):
242
  with gr.Column():
@@ -425,7 +398,7 @@ with main_block:
425
  show_copy_button=True,
426
  )
427
 
428
- main_block.load(fn=get_latest_data_leaderboard, inputs=[leaderboard], outputs=[leaderboard])
429
  leaderboard.change(fn=get_latest_data_queue, inputs=None, outputs=[finished_eval_table, running_eval_table, pending_eval_table])
430
  pending_eval_table.change(fn=vote_manager.create_request_vote_df, inputs=[pending_eval_table], outputs=[pending_eval_table_votes])
431
 
@@ -466,14 +439,9 @@ webhooks_server = enable_space_ci_and_return_server(ui=main_block)
466
  def update_leaderboard(payload: WebhookPayload) -> None:
467
  """Redownloads the leaderboard dataset each time it updates"""
468
  if payload.repo.type == "dataset" and payload.event.action == "update":
469
- global NEW_DATA_ON_LEADERBOARD
470
  logging.info("New data detected, downloading updated leaderboard dataset.")
471
-
472
- # Mark the flag for new data
473
- NEW_DATA_ON_LEADERBOARD = True
474
-
475
  # Now actually download the latest data immediately
476
- get_latest_data_leaderboard()
477
 
478
  # The below code is not used at the moment, as we can manage the queue file locally
479
  LAST_UPDATE_QUEUE = datetime.datetime.now()
 
44
  HF_HOME,
45
  )
46
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
47
+ from src.leaderboard.data import LeaderboardData
48
  from src.submission.submit import add_new_eval
49
  from src.voting.vote_system import VoteManager, run_scheduler
50
 
 
60
  NEW_DATA_ON_LEADERBOARD = True
61
  LEADERBOARD_DF = None
62
 
63
+
64
+ leaderboard_data = LeaderboardData()
65
+
66
+
67
  def restart_space():
68
  logging.info(f"Restarting space with repo ID: {REPO_ID}")
69
  try:
70
  # Check if new data is pending and download if necessary
71
  if NEW_DATA_ON_LEADERBOARD:
72
  logging.info("Fetching latest leaderboard data before restart.")
73
+ leaderboard_data.update()
74
 
75
  # Now restart the space
76
  API.restart_space(repo_id=REPO_ID, token=HF_TOKEN)
 
114
  attempt += 1
115
  raise Exception(f"Failed to download {repo_id} after {max_attempts} attempts")
116
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
  def get_latest_data_queue():
119
  eval_queue_dfs = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
 
121
 
122
  def init_space():
123
  """Initializes the application space, loading only necessary data."""
124
+ leaderboard_data.update()
 
125
 
126
  if DO_FULL_INIT:
127
  # These downloads only occur on full initialization
 
131
  except Exception:
132
  restart_space()
133
 
 
 
 
 
134
  # Evaluation queue DataFrame retrieval is independent of initialization detail level
135
  eval_queue_dfs = get_latest_data_queue()
136
+ return eval_queue_dfs
 
137
 
138
  # Initialize VoteManager
139
  vote_manager = VoteManager(VOTES_PATH, EVAL_REQUESTS_PATH, VOTES_REPO)
140
 
141
+ schedule.every(15).seconds.do(leaderboard_data.update)
142
 
143
  # Schedule the upload_votes method to run every 15 minutes
144
  schedule.every(15).minutes.do(vote_manager.upload_votes)
 
149
 
150
  # Calls the init_space function with the `full_init` parameter determined by the `do_full_init` variable.
151
  # This initializes various DataFrames used throughout the application, with the level of initialization detail controlled by the `do_full_init` flag.
152
+ eval_queue_dfs = init_space()
153
  finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = eval_queue_dfs
154
 
155
 
156
+ <<<<<<< Updated upstream
157
  # Function to check if a user is logged in
158
  def check_login(profile: gr.OAuthProfile | None) -> bool:
159
  if profile is None:
 
163
  def init_leaderboard(dataframe):
164
  if dataframe is None or dataframe.empty:
165
  raise ValueError("Leaderboard DataFrame is empty or None.")
166
+ =======
167
+ def make_leaderboard(leaderboard_data: LeaderboardData):
168
+ >>>>>>> Stashed changes
169
  return Leaderboard(
170
+ value=leaderboard_data.get_data(),
171
  datatype=[c.type for c in fields(AutoEvalColumn)],
172
  select_columns=SelectColumns(
173
  default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
 
209
 
210
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
211
  with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
212
+ leaderboard = make_leaderboard(leaderboard_data)
213
 
214
  with gr.TabItem("🚀 Submit ", elem_id="llm-benchmark-tab-table", id=5):
215
  with gr.Column():
 
398
  show_copy_button=True,
399
  )
400
 
401
+ # main_block.load(fn=leaderboard_data.get_data, inputs=[leaderboard], outputs=[leaderboard])
402
  leaderboard.change(fn=get_latest_data_queue, inputs=None, outputs=[finished_eval_table, running_eval_table, pending_eval_table])
403
  pending_eval_table.change(fn=vote_manager.create_request_vote_df, inputs=[pending_eval_table], outputs=[pending_eval_table_votes])
404
 
 
439
  def update_leaderboard(payload: WebhookPayload) -> None:
440
  """Redownloads the leaderboard dataset each time it updates"""
441
  if payload.repo.type == "dataset" and payload.event.action == "update":
 
442
  logging.info("New data detected, downloading updated leaderboard dataset.")
 
 
 
 
443
  # Now actually download the latest data immediately
444
+ leaderboard_data.update()
445
 
446
  # The below code is not used at the moment, as we can manage the queue file locally
447
  LAST_UPDATE_QUEUE = datetime.datetime.now()
src/leaderboard/data.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import datasets
3
+ from src.populate import get_leaderboard_df
4
+ from src.envs import AGGREGATED_REPO, HF_HOME
5
+ from src.display.utils import COLS, BENCHMARK_COLS
6
+
7
+ class LeaderboardData:
8
+ def __init__(self):
9
+ self.__data = None
10
+ self.aggregated_repo = AGGREGATED_REPO # Replace with your actual repo
11
+ self.hf_home = HF_HOME # Replace with your actual HF_HOME
12
+ self.cols = COLS # Replace with your actual COLS
13
+ self.benchmark_cols = BENCHMARK_COLS # Replace with your actual BENCHMARK_COLS
14
+
15
+ def __update(self):
16
+ """Internal method to download and process leaderboard data."""
17
+ try:
18
+ leaderboard_dataset = datasets.load_dataset(
19
+ self.aggregated_repo,
20
+ "default",
21
+ split="train",
22
+ cache_dir=self.hf_home,
23
+ download_mode=datasets.DownloadMode.FORCE_REDOWNLOAD,
24
+ verification_mode="no_checks"
25
+ )
26
+
27
+ self.__data = get_leaderboard_df(
28
+ leaderboard_dataset=leaderboard_dataset,
29
+ cols=self.cols,
30
+ benchmark_cols=self.benchmark_cols,
31
+ )
32
+
33
+ logging.info("Leaderboard dataset successfully downloaded.")
34
+ return self.__data
35
+
36
+ except Exception as e:
37
+ logging.error(f"Failed to download leaderboard dataset: {e}")
38
+ return None
39
+
40
+ def update(self):
41
+ """Public method to trigger leaderboard data update."""
42
+ logging.info("Leaderboard updated at reload!")
43
+ return self.__update()
44
+
45
+ def get_data(self):
46
+ """Returns the current leaderboard data."""
47
+ return self.__data
48
+
49
+
50
+ # def get_latest_data_leaderboard(leaderboard_initial_df=None):
51
+ # global NEW_DATA_ON_LEADERBOARD
52
+ # global LEADERBOARD_DF
53
+ # if NEW_DATA_ON_LEADERBOARD:
54
+ # logging.info("Leaderboard updated at reload!")
55
+ # try:
56
+ # leaderboard_dataset = datasets.load_dataset(
57
+ # AGGREGATED_REPO,
58
+ # "default",
59
+ # split="train",
60
+ # cache_dir=HF_HOME,
61
+ # download_mode=datasets.DownloadMode.FORCE_REDOWNLOAD, # Always download fresh data
62
+ # verification_mode="no_checks"
63
+ # )
64
+ # LEADERBOARD_DF = get_leaderboard_df(
65
+ # leaderboard_dataset=leaderboard_dataset,
66
+ # cols=COLS,
67
+ # benchmark_cols=BENCHMARK_COLS,
68
+ # )
69
+ # logging.info("Leaderboard dataset successfully downloaded.")
70
+ # except Exception as e:
71
+ # logging.error(f"Failed to download leaderboard dataset: {e}")
72
+ # return
73
+
74
+ # # Reset the flag after successful download
75
+ # NEW_DATA_ON_LEADERBOARD = False
76
+ # else:
77
+ # LEADERBOARD_DF = leaderboard_initial_df
78
+ # logging.info("Using cached leaderboard dataset.")
79
+ # return LEADERBOARD_DF