Spaces:

yanolja
/

arena

Running

Kang Suhyun suhyun.kang commited on Mar 15, 2024

Commit

076f69b

unverified ·

1 Parent(s): 4b31650

[#8] Update the leaderboard every 10 minutes (#38)

Changes:
- The leaderboard is now updated every 10 minutes

Note:
- This update results in unnecessary recalculations, with all battles being processed every 10 minutes, even if they've already been evaluated. This issue will be addressed in issue #37.

Co-authored-by: suhyun.kang <[email protected]>

Files changed (2) hide show

app.py +2 -27
leaderboard.py +53 -11

app.py CHANGED Viewed

@@ -2,41 +2,16 @@
 It provides a platform for comparing the responses of two LLMs.
 """
 import enum
-import json
-import os
 from uuid import uuid4
-import firebase_admin
-from firebase_admin import credentials
 from firebase_admin import firestore
 import gradio as gr
 from leaderboard import build_leaderboard
 import response
 from response import get_responses
-# Path to local credentials file, used in local development.
-CREDENTIALS_PATH = os.environ.get("CREDENTIALS_PATH")
-# Credentials passed as an environment variable, used in deployment.
-CREDENTIALS = os.environ.get("CREDENTIALS")
-def get_credentials():
-  # Set credentials using a file in a local environment, if available.
-  if CREDENTIALS_PATH and os.path.exists(CREDENTIALS_PATH):
-    return credentials.Certificate(CREDENTIALS_PATH)
-  # Use environment variable for credentials when the file is not found,
-  # as credentials should not be public.
-  json_cred = json.loads(CREDENTIALS)
-  return credentials.Certificate(json_cred)
-# TODO(#21): Fix auto-reload issue related to the initialization of Firebase.
-firebase_admin.initialize_app(get_credentials())
-db = firestore.client()
 SUPPORTED_TRANSLATION_LANGUAGES = [
     "Korean", "English", "Chinese", "Japanese", "Spanish", "French"
 ]
@@ -155,7 +130,7 @@ with gr.Blocks(title="Arena") as app:
   option_b.click(vote, [option_b] + common_inputs, common_outputs)
   tie.click(vote, [tie] + common_inputs, common_outputs)
-  build_leaderboard(db)
 if __name__ == "__main__":
   # We need to enable queue to use generators.

 It provides a platform for comparing the responses of two LLMs.
 """
 import enum
 from uuid import uuid4
 from firebase_admin import firestore
 import gradio as gr
 from leaderboard import build_leaderboard
+from leaderboard import db
 import response
 from response import get_responses
 SUPPORTED_TRANSLATION_LANGUAGES = [
     "Korean", "English", "Chinese", "Japanese", "Spanish", "French"
 ]
   option_b.click(vote, [option_b] + common_inputs, common_outputs)
   tie.click(vote, [tie] + common_inputs, common_outputs)
+  build_leaderboard()
 if __name__ == "__main__":
   # We need to enable queue to use generators.

leaderboard.py CHANGED Viewed

@@ -4,11 +4,38 @@ It provides a leaderboard component.
 from collections import defaultdict
 import enum
 import math
 import gradio as gr
 import pandas as pd
 class LeaderboardTab(enum.Enum):
   SUMMARIZATION = "Summarization"
@@ -35,17 +62,16 @@ def compute_elo(battles, k=4, scale=400, base=10, initial_rating=1000):
   return rating
-def get_docs(tab, db):
-  if tab.label == LeaderboardTab.SUMMARIZATION.value:
     return db.collection("arena-summarizations").order_by("timestamp").stream()
-  if tab.label == LeaderboardTab.TRANSLATION.value:
     return db.collection("arena-translations").order_by("timestamp").stream()
-# TODO(#8): Update the value periodically.
-def load_elo_ratings(tab, db):
-  docs = get_docs(tab, db)
   battles = []
   for doc in docs:
@@ -64,15 +90,31 @@ def load_elo_ratings(tab, db):
           for i, (model, rating) in enumerate(sorted_ratings)]
-def build_leaderboard(db):
   with gr.Tabs():
-    with gr.Tab(LeaderboardTab.SUMMARIZATION.value) as summarization_tab:
       gr.Dataframe(headers=["Rank", "Model", "Elo rating"],
                    datatype=["number", "str", "number"],
-                   value=load_elo_ratings(summarization_tab, db))
     # TODO(#9): Add language filter options.
-    with gr.Tab(LeaderboardTab.TRANSLATION.value) as translation_tab:
       gr.Dataframe(headers=["Rank", "Model", "Elo rating"],
                    datatype=["number", "str", "number"],
-                   value=load_elo_ratings(translation_tab, db))

 from collections import defaultdict
 import enum
+import json
 import math
+import os
+import firebase_admin
+from firebase_admin import credentials
+from firebase_admin import firestore
 import gradio as gr
 import pandas as pd
+# Path to local credentials file, used in local development.
+CREDENTIALS_PATH = os.environ.get("CREDENTIALS_PATH")
+# Credentials passed as an environment variable, used in deployment.
+CREDENTIALS = os.environ.get("CREDENTIALS")
+def get_credentials():
+  # Set credentials using a file in a local environment, if available.
+  if CREDENTIALS_PATH and os.path.exists(CREDENTIALS_PATH):
+    return credentials.Certificate(CREDENTIALS_PATH)
+  # Use environment variable for credentials when the file is not found,
+  # as credentials should not be public.
+  json_cred = json.loads(CREDENTIALS)
+  return credentials.Certificate(json_cred)
+# TODO(#21): Fix auto-reload issue related to the initialization of Firebase.
+firebase_admin.initialize_app(get_credentials())
+db = firestore.client()
 class LeaderboardTab(enum.Enum):
   SUMMARIZATION = "Summarization"
   return rating
+def get_docs(tab):
+  if tab == LeaderboardTab.SUMMARIZATION:
     return db.collection("arena-summarizations").order_by("timestamp").stream()
+  if tab == LeaderboardTab.TRANSLATION:
     return db.collection("arena-translations").order_by("timestamp").stream()
+def load_elo_ratings(tab):
+  docs = get_docs(tab)
   battles = []
   for doc in docs:
           for i, (model, rating) in enumerate(sorted_ratings)]
+def load_summarization_elo_ratings():
+  return load_elo_ratings(LeaderboardTab.SUMMARIZATION)
+def load_translation_elo_ratings():
+  return load_elo_ratings(LeaderboardTab.TRANSLATION)
+LEADERBOARD_UPDATE_INTERVAL = 600  # 10 minutes
+LEADERBOARD_INFO = "The leaderboard is updated every 10 minutes."
+def build_leaderboard():
   with gr.Tabs():
+    with gr.Tab(LeaderboardTab.SUMMARIZATION.value):
       gr.Dataframe(headers=["Rank", "Model", "Elo rating"],
                    datatype=["number", "str", "number"],
+                   value=load_summarization_elo_ratings,
+                   every=LEADERBOARD_UPDATE_INTERVAL)
+      gr.Markdown(LEADERBOARD_INFO)
     # TODO(#9): Add language filter options.
+    with gr.Tab(LeaderboardTab.TRANSLATION.value):
       gr.Dataframe(headers=["Rank", "Model", "Elo rating"],
                    datatype=["number", "str", "number"],
+                   value=load_translation_elo_ratings,
+                   every=LEADERBOARD_UPDATE_INTERVAL)
+      gr.Markdown(LEADERBOARD_INFO)