Kang Suhyun suhyun.kang commited on
Commit
076f69b
·
unverified ·
1 Parent(s): 4b31650

[#8] Update the leaderboard every 10 minutes (#38)

Browse files

Changes:
- The leaderboard is now updated every 10 minutes

Note:
- This update results in unnecessary recalculations, with all battles being processed every 10 minutes, even if they've already been evaluated. This issue will be addressed in issue #37.

Co-authored-by: suhyun.kang <[email protected]>

Files changed (2) hide show
  1. app.py +2 -27
  2. leaderboard.py +53 -11
app.py CHANGED
@@ -2,41 +2,16 @@
2
  It provides a platform for comparing the responses of two LLMs.
3
  """
4
  import enum
5
- import json
6
- import os
7
  from uuid import uuid4
8
 
9
- import firebase_admin
10
- from firebase_admin import credentials
11
  from firebase_admin import firestore
12
  import gradio as gr
13
 
14
  from leaderboard import build_leaderboard
 
15
  import response
16
  from response import get_responses
17
 
18
- # Path to local credentials file, used in local development.
19
- CREDENTIALS_PATH = os.environ.get("CREDENTIALS_PATH")
20
-
21
- # Credentials passed as an environment variable, used in deployment.
22
- CREDENTIALS = os.environ.get("CREDENTIALS")
23
-
24
-
25
- def get_credentials():
26
- # Set credentials using a file in a local environment, if available.
27
- if CREDENTIALS_PATH and os.path.exists(CREDENTIALS_PATH):
28
- return credentials.Certificate(CREDENTIALS_PATH)
29
-
30
- # Use environment variable for credentials when the file is not found,
31
- # as credentials should not be public.
32
- json_cred = json.loads(CREDENTIALS)
33
- return credentials.Certificate(json_cred)
34
-
35
-
36
- # TODO(#21): Fix auto-reload issue related to the initialization of Firebase.
37
- firebase_admin.initialize_app(get_credentials())
38
- db = firestore.client()
39
-
40
  SUPPORTED_TRANSLATION_LANGUAGES = [
41
  "Korean", "English", "Chinese", "Japanese", "Spanish", "French"
42
  ]
@@ -155,7 +130,7 @@ with gr.Blocks(title="Arena") as app:
155
  option_b.click(vote, [option_b] + common_inputs, common_outputs)
156
  tie.click(vote, [tie] + common_inputs, common_outputs)
157
 
158
- build_leaderboard(db)
159
 
160
  if __name__ == "__main__":
161
  # We need to enable queue to use generators.
 
2
  It provides a platform for comparing the responses of two LLMs.
3
  """
4
  import enum
 
 
5
  from uuid import uuid4
6
 
 
 
7
  from firebase_admin import firestore
8
  import gradio as gr
9
 
10
  from leaderboard import build_leaderboard
11
+ from leaderboard import db
12
  import response
13
  from response import get_responses
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  SUPPORTED_TRANSLATION_LANGUAGES = [
16
  "Korean", "English", "Chinese", "Japanese", "Spanish", "French"
17
  ]
 
130
  option_b.click(vote, [option_b] + common_inputs, common_outputs)
131
  tie.click(vote, [tie] + common_inputs, common_outputs)
132
 
133
+ build_leaderboard()
134
 
135
  if __name__ == "__main__":
136
  # We need to enable queue to use generators.
leaderboard.py CHANGED
@@ -4,11 +4,38 @@ It provides a leaderboard component.
4
 
5
  from collections import defaultdict
6
  import enum
 
7
  import math
 
8
 
 
 
 
9
  import gradio as gr
10
  import pandas as pd
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  class LeaderboardTab(enum.Enum):
14
  SUMMARIZATION = "Summarization"
@@ -35,17 +62,16 @@ def compute_elo(battles, k=4, scale=400, base=10, initial_rating=1000):
35
  return rating
36
 
37
 
38
- def get_docs(tab, db):
39
- if tab.label == LeaderboardTab.SUMMARIZATION.value:
40
  return db.collection("arena-summarizations").order_by("timestamp").stream()
41
 
42
- if tab.label == LeaderboardTab.TRANSLATION.value:
43
  return db.collection("arena-translations").order_by("timestamp").stream()
44
 
45
 
46
- # TODO(#8): Update the value periodically.
47
- def load_elo_ratings(tab, db):
48
- docs = get_docs(tab, db)
49
 
50
  battles = []
51
  for doc in docs:
@@ -64,15 +90,31 @@ def load_elo_ratings(tab, db):
64
  for i, (model, rating) in enumerate(sorted_ratings)]
65
 
66
 
67
- def build_leaderboard(db):
 
 
 
 
 
 
 
 
 
 
 
 
68
  with gr.Tabs():
69
- with gr.Tab(LeaderboardTab.SUMMARIZATION.value) as summarization_tab:
70
  gr.Dataframe(headers=["Rank", "Model", "Elo rating"],
71
  datatype=["number", "str", "number"],
72
- value=load_elo_ratings(summarization_tab, db))
 
 
73
 
74
  # TODO(#9): Add language filter options.
75
- with gr.Tab(LeaderboardTab.TRANSLATION.value) as translation_tab:
76
  gr.Dataframe(headers=["Rank", "Model", "Elo rating"],
77
  datatype=["number", "str", "number"],
78
- value=load_elo_ratings(translation_tab, db))
 
 
 
4
 
5
  from collections import defaultdict
6
  import enum
7
+ import json
8
  import math
9
+ import os
10
 
11
+ import firebase_admin
12
+ from firebase_admin import credentials
13
+ from firebase_admin import firestore
14
  import gradio as gr
15
  import pandas as pd
16
 
17
+ # Path to local credentials file, used in local development.
18
+ CREDENTIALS_PATH = os.environ.get("CREDENTIALS_PATH")
19
+
20
+ # Credentials passed as an environment variable, used in deployment.
21
+ CREDENTIALS = os.environ.get("CREDENTIALS")
22
+
23
+
24
+ def get_credentials():
25
+ # Set credentials using a file in a local environment, if available.
26
+ if CREDENTIALS_PATH and os.path.exists(CREDENTIALS_PATH):
27
+ return credentials.Certificate(CREDENTIALS_PATH)
28
+
29
+ # Use environment variable for credentials when the file is not found,
30
+ # as credentials should not be public.
31
+ json_cred = json.loads(CREDENTIALS)
32
+ return credentials.Certificate(json_cred)
33
+
34
+
35
+ # TODO(#21): Fix auto-reload issue related to the initialization of Firebase.
36
+ firebase_admin.initialize_app(get_credentials())
37
+ db = firestore.client()
38
+
39
 
40
  class LeaderboardTab(enum.Enum):
41
  SUMMARIZATION = "Summarization"
 
62
  return rating
63
 
64
 
65
+ def get_docs(tab):
66
+ if tab == LeaderboardTab.SUMMARIZATION:
67
  return db.collection("arena-summarizations").order_by("timestamp").stream()
68
 
69
+ if tab == LeaderboardTab.TRANSLATION:
70
  return db.collection("arena-translations").order_by("timestamp").stream()
71
 
72
 
73
+ def load_elo_ratings(tab):
74
+ docs = get_docs(tab)
 
75
 
76
  battles = []
77
  for doc in docs:
 
90
  for i, (model, rating) in enumerate(sorted_ratings)]
91
 
92
 
93
+ def load_summarization_elo_ratings():
94
+ return load_elo_ratings(LeaderboardTab.SUMMARIZATION)
95
+
96
+
97
+ def load_translation_elo_ratings():
98
+ return load_elo_ratings(LeaderboardTab.TRANSLATION)
99
+
100
+
101
+ LEADERBOARD_UPDATE_INTERVAL = 600 # 10 minutes
102
+ LEADERBOARD_INFO = "The leaderboard is updated every 10 minutes."
103
+
104
+
105
+ def build_leaderboard():
106
  with gr.Tabs():
107
+ with gr.Tab(LeaderboardTab.SUMMARIZATION.value):
108
  gr.Dataframe(headers=["Rank", "Model", "Elo rating"],
109
  datatype=["number", "str", "number"],
110
+ value=load_summarization_elo_ratings,
111
+ every=LEADERBOARD_UPDATE_INTERVAL)
112
+ gr.Markdown(LEADERBOARD_INFO)
113
 
114
  # TODO(#9): Add language filter options.
115
+ with gr.Tab(LeaderboardTab.TRANSLATION.value):
116
  gr.Dataframe(headers=["Rank", "Model", "Elo rating"],
117
  datatype=["number", "str", "number"],
118
+ value=load_translation_elo_ratings,
119
+ every=LEADERBOARD_UPDATE_INTERVAL)
120
+ gr.Markdown(LEADERBOARD_INFO)