open_llm_leaderboard

Runtime error

App Files Files Community

gsaivinay commited on Sep 17, 2023

Commit

e5cbf2a

1 Parent(s): ee366c0

updates

Browse files

Files changed (6) hide show

app.py +39 -8
model_info_cache.pkl +2 -2
model_size_cache.pkl +3 -0
requirements.txt +1 -0
src/assets/text_content.py +1 -1
src/display_models/get_model_metadata.py +27 -9

app.py CHANGED Viewed

@@ -114,6 +114,8 @@ leaderboard_df = original_df.copy()
     pending_eval_queue_df,
 ) = get_evaluation_queue_df(eval_queue, eval_queue_private, EVAL_REQUESTS_PATH, EVAL_COLS)
 ## INTERACTION FUNCTIONS
 def add_new_eval(
@@ -216,8 +218,8 @@ def change_tab(query_param: str):
 # Searching and filtering
-def update_table(hidden_df: pd.DataFrame, current_columns_df: pd.DataFrame, columns: list, type_query: list, size_query: list, show_deleted: bool, query: str):
-    filtered_df = filter_models(hidden_df, type_query, size_query, show_deleted)
     if query != "":
         filtered_df = search_table(filtered_df, query)
     df = select_columns(filtered_df, columns)
@@ -249,7 +251,7 @@ NUMERIC_INTERVALS = {
 }
 def filter_models(
-    df: pd.DataFrame, type_query: list, size_query: list, show_deleted: bool
 ) -> pd.DataFrame:
     # Show all models
     if show_deleted:
@@ -259,6 +261,7 @@ def filter_models(
     type_emoji = [t[0] for t in type_query]
     filtered_df = filtered_df[df[AutoEvalColumn.model_type_symbol.name].isin(type_emoji)]
     numeric_interval = pd.IntervalIndex(sorted([NUMERIC_INTERVALS[s] for s in size_query]))
     params_column = pd.to_numeric(df[AutoEvalColumn.params.name], errors="coerce")
@@ -277,6 +280,12 @@ with demo:
         with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
             with gr.Row():
                 with gr.Column():
                     with gr.Row():
                         shown_columns = gr.CheckboxGroup(
                             choices=[
@@ -310,11 +319,6 @@ with demo:
                             value=True, label="Show gated/private/deleted models", interactive=True
                         )
                 with gr.Column(min_width=320):
-                    search_bar = gr.Textbox(
-                        placeholder="🔍 Search for your model and press ENTER...",
-                        show_label=False,
-                        elem_id="search-bar",
-                    )
                     with gr.Box(elem_id="box-filter"):
                         filter_columns_type = gr.CheckboxGroup(
                             label="Model types",
@@ -333,6 +337,13 @@ with demo:
                             interactive=True,
                             elem_id="filter-columns-type",
                         )
                         filter_columns_size = gr.CheckboxGroup(
                             label="Model sizes",
                             choices=list(NUMERIC_INTERVALS.keys()),
@@ -375,6 +386,7 @@ with demo:
                     leaderboard_table,
                     shown_columns,
                     filter_columns_type,
                     filter_columns_size,
                     deleted_models_visibility,
                     search_bar,
@@ -388,6 +400,7 @@ with demo:
                     leaderboard_table,
                     shown_columns,
                     filter_columns_type,
                     filter_columns_size,
                     deleted_models_visibility,
                     search_bar,
@@ -402,6 +415,22 @@ with demo:
                     leaderboard_table,
                     shown_columns,
                     filter_columns_type,
                     filter_columns_size,
                     deleted_models_visibility,
                     search_bar,
@@ -416,6 +445,7 @@ with demo:
                     leaderboard_table,
                     shown_columns,
                     filter_columns_type,
                     filter_columns_size,
                     deleted_models_visibility,
                     search_bar,
@@ -430,6 +460,7 @@ with demo:
                     leaderboard_table,
                     shown_columns,
                     filter_columns_type,
                     filter_columns_size,
                     deleted_models_visibility,
                     search_bar,

     pending_eval_queue_df,
 ) = get_evaluation_queue_df(eval_queue, eval_queue_private, EVAL_REQUESTS_PATH, EVAL_COLS)
+print(leaderboard_df["Precision"].unique())
 ## INTERACTION FUNCTIONS
 def add_new_eval(
 # Searching and filtering
+def update_table(hidden_df: pd.DataFrame, current_columns_df: pd.DataFrame, columns: list, type_query: list, precision_query: str, size_query: list, show_deleted: bool, query: str):
+    filtered_df = filter_models(hidden_df, type_query, size_query, precision_query, show_deleted)
     if query != "":
         filtered_df = search_table(filtered_df, query)
     df = select_columns(filtered_df, columns)
 }
 def filter_models(
+    df: pd.DataFrame, type_query: list, size_query: list, precision_query: list, show_deleted: bool
 ) -> pd.DataFrame:
     # Show all models
     if show_deleted:
     type_emoji = [t[0] for t in type_query]
     filtered_df = filtered_df[df[AutoEvalColumn.model_type_symbol.name].isin(type_emoji)]
+    filtered_df = filtered_df[df[AutoEvalColumn.precision.name].isin(precision_query)]
     numeric_interval = pd.IntervalIndex(sorted([NUMERIC_INTERVALS[s] for s in size_query]))
     params_column = pd.to_numeric(df[AutoEvalColumn.params.name], errors="coerce")
         with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
             with gr.Row():
                 with gr.Column():
+                    with gr.Row():
+                        search_bar = gr.Textbox(
+                            placeholder=" 🔍 Search for your model and press ENTER...",
+                            show_label=False,
+                            elem_id="search-bar",
+                        )
                     with gr.Row():
                         shown_columns = gr.CheckboxGroup(
                             choices=[
                             value=True, label="Show gated/private/deleted models", interactive=True
                         )
                 with gr.Column(min_width=320):
                     with gr.Box(elem_id="box-filter"):
                         filter_columns_type = gr.CheckboxGroup(
                             label="Model types",
                             interactive=True,
                             elem_id="filter-columns-type",
                         )
+                        filter_columns_precision = gr.CheckboxGroup(
+                            label="Precision",
+                            choices=["torch.float16", "torch.bfloat16", "torch.float32", "8bit", "4bit", "GPTQ"],
+                            value=["torch.float16", "torch.bfloat16", "torch.float32", "8bit", "4bit", "GPTQ"],
+                            interactive=True,
+                            elem_id="filter-columns-precision",
+                        )
                         filter_columns_size = gr.CheckboxGroup(
                             label="Model sizes",
                             choices=list(NUMERIC_INTERVALS.keys()),
                     leaderboard_table,
                     shown_columns,
                     filter_columns_type,
+                    filter_columns_precision,
                     filter_columns_size,
                     deleted_models_visibility,
                     search_bar,
                     leaderboard_table,
                     shown_columns,
                     filter_columns_type,
+                    filter_columns_precision,
                     filter_columns_size,
                     deleted_models_visibility,
                     search_bar,
                     leaderboard_table,
                     shown_columns,
                     filter_columns_type,
+                    filter_columns_precision,
+                    filter_columns_size,
+                    deleted_models_visibility,
+                    search_bar,
+                ],
+                leaderboard_table,
+                queue=True,
+            )
+            filter_columns_precision.change(
+                update_table,
+                [
+                    hidden_leaderboard_table_for_search,
+                    leaderboard_table,
+                    shown_columns,
+                    filter_columns_type,
+                    filter_columns_precision,
                     filter_columns_size,
                     deleted_models_visibility,
                     search_bar,
                     leaderboard_table,
                     shown_columns,
                     filter_columns_type,
+                    filter_columns_precision,
                     filter_columns_size,
                     deleted_models_visibility,
                     search_bar,
                     leaderboard_table,
                     shown_columns,
                     filter_columns_type,
+                    filter_columns_precision,
                     filter_columns_size,
                     deleted_models_visibility,
                     search_bar,

model_info_cache.pkl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c307938f15bda18b6c38af3d02cc0407d9d8d5345bc31f475af2cbbb33a4f8b5
-size 2895750

 version https://git-lfs.github.com/spec/v1
+oid sha256:4256b2cbebd45f47d6d6316f299d760c3b3e50e4a41281c69ae44ade57bfc38c
+size 3015063

model_size_cache.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fd4b59351406f51675c364cf95779063458dcf3e2653239c9f4e024ed16e23f1
+size 58618

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 aiofiles==23.1.0
 aiohttp==3.8.4
 aiosignal==1.3.1

+accelerate==0.23.0
 aiofiles==23.1.0
 aiohttp==3.8.4
 aiosignal==1.3.1

src/assets/text_content.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from src.display_models.model_metadata_type import ModelType
 TITLE = """<h1 align="center" id="space-title">🤗 Open LLM Leaderboard</h1>
-<h2 align="center" id="space-title">This space displays GPT-4 and GPT-3.5 scores from [techinal paper](https://cdn.openai.com/papers/gpt-4.pdf)</h2>"""
 INTRODUCTION_TEXT = """
 📐 The 🤗 Open LLM Leaderboard aims to track, rank and evaluate open LLMs and chatbots.

 from src.display_models.model_metadata_type import ModelType
 TITLE = """<h1 align="center" id="space-title">🤗 Open LLM Leaderboard</h1>
+<h2 align="center" id="space-title">This space displays GPT-4 and GPT-3.5 scores from <a href="https://cdn.openai.com/papers/gpt-4.pdf" target="_blank" rel="noopener noreferrer">techinal paper</a></h2>"""
 INTRODUCTION_TEXT = """
 📐 The 🤗 Open LLM Leaderboard aims to track, rank and evaluate open LLMs and chatbots.

src/display_models/get_model_metadata.py CHANGED Viewed

@@ -8,6 +8,8 @@ from typing import List
 import huggingface_hub
 from huggingface_hub import HfApi
 from tqdm import tqdm
 from src.display_models.model_metadata_flags import DO_NOT_SUBMIT_MODELS, FLAGGED_MODELS
 from src.display_models.model_metadata_type import MODEL_TYPE_METADATA, ModelType, model_type_from_str
@@ -21,8 +23,13 @@ def get_model_infos_from_hub(leaderboard_data: List[dict]):
     try:
         with open("model_info_cache.pkl", "rb") as f:
             model_info_cache = pickle.load(f)
-    except EOFError:
         model_info_cache = {}
     for model_data in tqdm(leaderboard_data):
         model_name = model_data["model_name_for_query"]
@@ -37,16 +44,21 @@ def get_model_infos_from_hub(leaderboard_data: List[dict]):
                 print("Repo not found!", model_name)
                 model_data[AutoEvalColumn.license.name] = None
                 model_data[AutoEvalColumn.likes.name] = None
-                model_data[AutoEvalColumn.params.name] = get_model_size(model_name, None)
-                continue
         model_data[AutoEvalColumn.license.name] = get_model_license(model_info)
         model_data[AutoEvalColumn.likes.name] = get_model_likes(model_info)
-        model_data[AutoEvalColumn.params.name] = get_model_size(model_name, model_info)
     # save cache to disk in pickle format
     with open("model_info_cache.pkl", "wb") as f:
         pickle.dump(model_info_cache, f)
 def get_model_license(model_info):
@@ -69,11 +81,17 @@ def get_model_size(model_name, model_info):
         return round(model_info.safetensors["total"] / 1e9, 3)
     except AttributeError:
         try:
-            size_match = re.search(size_pattern, model_name.lower())
-            size = size_match.group(0)
-            return round(float(size[:-1]) if size[-1] == "b" else float(size[:-1]) / 1e3, 3)
-        except AttributeError:
-            return 0
 def get_model_type(leaderboard_data: List[dict]):

 import huggingface_hub
 from huggingface_hub import HfApi
 from tqdm import tqdm
+from transformers import AutoModel, AutoConfig
+from accelerate import init_empty_weights
 from src.display_models.model_metadata_flags import DO_NOT_SUBMIT_MODELS, FLAGGED_MODELS
 from src.display_models.model_metadata_type import MODEL_TYPE_METADATA, ModelType, model_type_from_str
     try:
         with open("model_info_cache.pkl", "rb") as f:
             model_info_cache = pickle.load(f)
+    except (EOFError, FileNotFoundError):
         model_info_cache = {}
+    try:
+        with open("model_size_cache.pkl", "rb") as f:
+            model_size_cache = pickle.load(f)
+    except (EOFError, FileNotFoundError):
+        model_size_cache = {}
     for model_data in tqdm(leaderboard_data):
         model_name = model_data["model_name_for_query"]
                 print("Repo not found!", model_name)
                 model_data[AutoEvalColumn.license.name] = None
                 model_data[AutoEvalColumn.likes.name] = None
+                if model_name not in model_size_cache:
+                    model_size_cache[model_name] = get_model_size(model_name, None)
+                model_data[AutoEvalColumn.params.name] = model_size_cache[model_name]
         model_data[AutoEvalColumn.license.name] = get_model_license(model_info)
         model_data[AutoEvalColumn.likes.name] = get_model_likes(model_info)
+        if model_name not in model_size_cache:
+            model_size_cache[model_name] = get_model_size(model_name, model_info)
+        model_data[AutoEvalColumn.params.name] = model_size_cache[model_name]
     # save cache to disk in pickle format
     with open("model_info_cache.pkl", "wb") as f:
         pickle.dump(model_info_cache, f)
+    with open("model_size_cache.pkl", "wb") as f:
+        pickle.dump(model_size_cache, f)
 def get_model_license(model_info):
         return round(model_info.safetensors["total"] / 1e9, 3)
     except AttributeError:
         try:
+            config = AutoConfig.from_pretrained(model_name, trust_remote_code=False)
+            with init_empty_weights():
+                model = AutoModel.from_config(config, trust_remote_code=False)
+            return round(sum(p.numel() for p in model.parameters() if p.requires_grad) / 1e9, 3)
+        except (EnvironmentError, ValueError): # model config not found, likely private
+            try:
+                size_match = re.search(size_pattern, model_name.lower())
+                size = size_match.group(0)
+                return round(float(size[:-1]) if size[-1] == "b" else float(size[:-1]) / 1e3, 3)
+            except AttributeError:
+                return 0
 def get_model_type(leaderboard_data: List[dict]):