Spaces:
Running
Running
Kang Suhyun
suhyun.kang
commited on
[#8] Update the leaderboard every 10 minutes (#38)
Browse filesChanges:
- The leaderboard is now updated every 10 minutes
Note:
- This update results in unnecessary recalculations, with all battles being processed every 10 minutes, even if they've already been evaluated. This issue will be addressed in issue #37.
Co-authored-by: suhyun.kang <[email protected]>
- app.py +2 -27
- leaderboard.py +53 -11
app.py
CHANGED
@@ -2,41 +2,16 @@
|
|
2 |
It provides a platform for comparing the responses of two LLMs.
|
3 |
"""
|
4 |
import enum
|
5 |
-
import json
|
6 |
-
import os
|
7 |
from uuid import uuid4
|
8 |
|
9 |
-
import firebase_admin
|
10 |
-
from firebase_admin import credentials
|
11 |
from firebase_admin import firestore
|
12 |
import gradio as gr
|
13 |
|
14 |
from leaderboard import build_leaderboard
|
|
|
15 |
import response
|
16 |
from response import get_responses
|
17 |
|
18 |
-
# Path to local credentials file, used in local development.
|
19 |
-
CREDENTIALS_PATH = os.environ.get("CREDENTIALS_PATH")
|
20 |
-
|
21 |
-
# Credentials passed as an environment variable, used in deployment.
|
22 |
-
CREDENTIALS = os.environ.get("CREDENTIALS")
|
23 |
-
|
24 |
-
|
25 |
-
def get_credentials():
|
26 |
-
# Set credentials using a file in a local environment, if available.
|
27 |
-
if CREDENTIALS_PATH and os.path.exists(CREDENTIALS_PATH):
|
28 |
-
return credentials.Certificate(CREDENTIALS_PATH)
|
29 |
-
|
30 |
-
# Use environment variable for credentials when the file is not found,
|
31 |
-
# as credentials should not be public.
|
32 |
-
json_cred = json.loads(CREDENTIALS)
|
33 |
-
return credentials.Certificate(json_cred)
|
34 |
-
|
35 |
-
|
36 |
-
# TODO(#21): Fix auto-reload issue related to the initialization of Firebase.
|
37 |
-
firebase_admin.initialize_app(get_credentials())
|
38 |
-
db = firestore.client()
|
39 |
-
|
40 |
SUPPORTED_TRANSLATION_LANGUAGES = [
|
41 |
"Korean", "English", "Chinese", "Japanese", "Spanish", "French"
|
42 |
]
|
@@ -155,7 +130,7 @@ with gr.Blocks(title="Arena") as app:
|
|
155 |
option_b.click(vote, [option_b] + common_inputs, common_outputs)
|
156 |
tie.click(vote, [tie] + common_inputs, common_outputs)
|
157 |
|
158 |
-
build_leaderboard(
|
159 |
|
160 |
if __name__ == "__main__":
|
161 |
# We need to enable queue to use generators.
|
|
|
2 |
It provides a platform for comparing the responses of two LLMs.
|
3 |
"""
|
4 |
import enum
|
|
|
|
|
5 |
from uuid import uuid4
|
6 |
|
|
|
|
|
7 |
from firebase_admin import firestore
|
8 |
import gradio as gr
|
9 |
|
10 |
from leaderboard import build_leaderboard
|
11 |
+
from leaderboard import db
|
12 |
import response
|
13 |
from response import get_responses
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
SUPPORTED_TRANSLATION_LANGUAGES = [
|
16 |
"Korean", "English", "Chinese", "Japanese", "Spanish", "French"
|
17 |
]
|
|
|
130 |
option_b.click(vote, [option_b] + common_inputs, common_outputs)
|
131 |
tie.click(vote, [tie] + common_inputs, common_outputs)
|
132 |
|
133 |
+
build_leaderboard()
|
134 |
|
135 |
if __name__ == "__main__":
|
136 |
# We need to enable queue to use generators.
|
leaderboard.py
CHANGED
@@ -4,11 +4,38 @@ It provides a leaderboard component.
|
|
4 |
|
5 |
from collections import defaultdict
|
6 |
import enum
|
|
|
7 |
import math
|
|
|
8 |
|
|
|
|
|
|
|
9 |
import gradio as gr
|
10 |
import pandas as pd
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
class LeaderboardTab(enum.Enum):
|
14 |
SUMMARIZATION = "Summarization"
|
@@ -35,17 +62,16 @@ def compute_elo(battles, k=4, scale=400, base=10, initial_rating=1000):
|
|
35 |
return rating
|
36 |
|
37 |
|
38 |
-
def get_docs(tab
|
39 |
-
if tab
|
40 |
return db.collection("arena-summarizations").order_by("timestamp").stream()
|
41 |
|
42 |
-
if tab
|
43 |
return db.collection("arena-translations").order_by("timestamp").stream()
|
44 |
|
45 |
|
46 |
-
|
47 |
-
|
48 |
-
docs = get_docs(tab, db)
|
49 |
|
50 |
battles = []
|
51 |
for doc in docs:
|
@@ -64,15 +90,31 @@ def load_elo_ratings(tab, db):
|
|
64 |
for i, (model, rating) in enumerate(sorted_ratings)]
|
65 |
|
66 |
|
67 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
with gr.Tabs():
|
69 |
-
with gr.Tab(LeaderboardTab.SUMMARIZATION.value)
|
70 |
gr.Dataframe(headers=["Rank", "Model", "Elo rating"],
|
71 |
datatype=["number", "str", "number"],
|
72 |
-
value=
|
|
|
|
|
73 |
|
74 |
# TODO(#9): Add language filter options.
|
75 |
-
with gr.Tab(LeaderboardTab.TRANSLATION.value)
|
76 |
gr.Dataframe(headers=["Rank", "Model", "Elo rating"],
|
77 |
datatype=["number", "str", "number"],
|
78 |
-
value=
|
|
|
|
|
|
4 |
|
5 |
from collections import defaultdict
|
6 |
import enum
|
7 |
+
import json
|
8 |
import math
|
9 |
+
import os
|
10 |
|
11 |
+
import firebase_admin
|
12 |
+
from firebase_admin import credentials
|
13 |
+
from firebase_admin import firestore
|
14 |
import gradio as gr
|
15 |
import pandas as pd
|
16 |
|
17 |
+
# Path to local credentials file, used in local development.
|
18 |
+
CREDENTIALS_PATH = os.environ.get("CREDENTIALS_PATH")
|
19 |
+
|
20 |
+
# Credentials passed as an environment variable, used in deployment.
|
21 |
+
CREDENTIALS = os.environ.get("CREDENTIALS")
|
22 |
+
|
23 |
+
|
24 |
+
def get_credentials():
|
25 |
+
# Set credentials using a file in a local environment, if available.
|
26 |
+
if CREDENTIALS_PATH and os.path.exists(CREDENTIALS_PATH):
|
27 |
+
return credentials.Certificate(CREDENTIALS_PATH)
|
28 |
+
|
29 |
+
# Use environment variable for credentials when the file is not found,
|
30 |
+
# as credentials should not be public.
|
31 |
+
json_cred = json.loads(CREDENTIALS)
|
32 |
+
return credentials.Certificate(json_cred)
|
33 |
+
|
34 |
+
|
35 |
+
# TODO(#21): Fix auto-reload issue related to the initialization of Firebase.
|
36 |
+
firebase_admin.initialize_app(get_credentials())
|
37 |
+
db = firestore.client()
|
38 |
+
|
39 |
|
40 |
class LeaderboardTab(enum.Enum):
|
41 |
SUMMARIZATION = "Summarization"
|
|
|
62 |
return rating
|
63 |
|
64 |
|
65 |
+
def get_docs(tab):
|
66 |
+
if tab == LeaderboardTab.SUMMARIZATION:
|
67 |
return db.collection("arena-summarizations").order_by("timestamp").stream()
|
68 |
|
69 |
+
if tab == LeaderboardTab.TRANSLATION:
|
70 |
return db.collection("arena-translations").order_by("timestamp").stream()
|
71 |
|
72 |
|
73 |
+
def load_elo_ratings(tab):
|
74 |
+
docs = get_docs(tab)
|
|
|
75 |
|
76 |
battles = []
|
77 |
for doc in docs:
|
|
|
90 |
for i, (model, rating) in enumerate(sorted_ratings)]
|
91 |
|
92 |
|
93 |
+
def load_summarization_elo_ratings():
|
94 |
+
return load_elo_ratings(LeaderboardTab.SUMMARIZATION)
|
95 |
+
|
96 |
+
|
97 |
+
def load_translation_elo_ratings():
|
98 |
+
return load_elo_ratings(LeaderboardTab.TRANSLATION)
|
99 |
+
|
100 |
+
|
101 |
+
LEADERBOARD_UPDATE_INTERVAL = 600 # 10 minutes
|
102 |
+
LEADERBOARD_INFO = "The leaderboard is updated every 10 minutes."
|
103 |
+
|
104 |
+
|
105 |
+
def build_leaderboard():
|
106 |
with gr.Tabs():
|
107 |
+
with gr.Tab(LeaderboardTab.SUMMARIZATION.value):
|
108 |
gr.Dataframe(headers=["Rank", "Model", "Elo rating"],
|
109 |
datatype=["number", "str", "number"],
|
110 |
+
value=load_summarization_elo_ratings,
|
111 |
+
every=LEADERBOARD_UPDATE_INTERVAL)
|
112 |
+
gr.Markdown(LEADERBOARD_INFO)
|
113 |
|
114 |
# TODO(#9): Add language filter options.
|
115 |
+
with gr.Tab(LeaderboardTab.TRANSLATION.value):
|
116 |
gr.Dataframe(headers=["Rank", "Model", "Elo rating"],
|
117 |
datatype=["number", "str", "number"],
|
118 |
+
value=load_translation_elo_ratings,
|
119 |
+
every=LEADERBOARD_UPDATE_INTERVAL)
|
120 |
+
gr.Markdown(LEADERBOARD_INFO)
|