Spaces:
Runtime error
Runtime error
Commit
β’
ad8e780
1
Parent(s):
8e88086
init
Browse files- app.py +159 -0
- model_size.py +6 -0
- model_types.py +31 -0
- requirements.txt +5 -0
app.py
ADDED
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from apscheduler.schedulers.background import BackgroundScheduler
|
2 |
+
from model_types import MODEL_TYPES, ModelType
|
3 |
+
from huggingface_hub import HfApi
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
import bar_chart_race as bcr
|
6 |
+
import pandas as pd
|
7 |
+
import gradio as gr
|
8 |
+
import os
|
9 |
+
|
10 |
+
# MODEL_SIZES = pd.read_pickle(
|
11 |
+
# "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/resolve/main/model_size_cache.pkl"
|
12 |
+
# )
|
13 |
+
|
14 |
+
# read in the data
|
15 |
+
open_llm_race_dataset = pd.read_csv("open_llm_race_dataset.csv")
|
16 |
+
# resample for ever model to a daily frequency
|
17 |
+
open_llm_race_dataset["date"] = pd.to_datetime(open_llm_race_dataset["date"])
|
18 |
+
open_llm_race_dataset = (
|
19 |
+
open_llm_race_dataset.set_index("date", drop=True)
|
20 |
+
.groupby("model", as_index=False)
|
21 |
+
.resample("D", how="last", closed="right", fill_method="ffill")
|
22 |
+
.last()
|
23 |
+
.reset_index(drop=False)
|
24 |
+
)
|
25 |
+
# filter
|
26 |
+
open_llm_race_dataset["date"] = open_llm_race_dataset["date"].dt.strftime("%Y-%m-%d")
|
27 |
+
open_llm_race_dataset = open_llm_race_dataset[
|
28 |
+
open_llm_race_dataset["date"] >= "2023-07-10"
|
29 |
+
]
|
30 |
+
open_llm_race_dataset = open_llm_race_dataset[["date", "score", "model"]]
|
31 |
+
# drop nan values
|
32 |
+
open_llm_race_dataset.dropna(inplace=True)
|
33 |
+
# drop duplicates on model and date
|
34 |
+
open_llm_race_dataset.drop_duplicates(subset=["model", "date"], inplace=True)
|
35 |
+
# add the model type
|
36 |
+
open_llm_race_dataset["type"] = open_llm_race_dataset["model"].apply(
|
37 |
+
lambda x: MODEL_TYPES[x].name if x in MODEL_TYPES else ModelType.Unknown.name
|
38 |
+
)
|
39 |
+
|
40 |
+
# # add the model size
|
41 |
+
# open_llm_race_dataset["size"] = open_llm_race_dataset["model"].apply(
|
42 |
+
# lambda x: MODEL_SIZES[x] if x in MODEL_SIZES else None
|
43 |
+
# )
|
44 |
+
|
45 |
+
# Demo interface
|
46 |
+
demo = gr.Blocks()
|
47 |
+
with demo:
|
48 |
+
# leaderboard title
|
49 |
+
gr.HTML("<h1>Open-LLM Race πββοΈ</h1>")
|
50 |
+
|
51 |
+
with gr.Tabs():
|
52 |
+
with gr.TabItem(label="Pretrained Models"):
|
53 |
+
pretrained_fig, ax = plt.subplots(figsize=(12, 6))
|
54 |
+
ax.set_xlim(0, 100)
|
55 |
+
pretrained_dataset = open_llm_race_dataset[
|
56 |
+
open_llm_race_dataset["type"] == ModelType.PT.name
|
57 |
+
]
|
58 |
+
pretrained_dataset = pretrained_dataset.pivot(
|
59 |
+
index="date", columns="model", values="score"
|
60 |
+
)
|
61 |
+
pretrained_dataset.fillna(0, inplace=True)
|
62 |
+
pretrained_fig = bcr.bar_chart_race(
|
63 |
+
pretrained_dataset,
|
64 |
+
n_bars=10,
|
65 |
+
fixed_max=True,
|
66 |
+
period_length=1000,
|
67 |
+
steps_per_period=20,
|
68 |
+
end_period_pause=100,
|
69 |
+
bar_texttemplate="{x:.2f}",
|
70 |
+
filter_column_colors=True,
|
71 |
+
fig=pretrained_fig,
|
72 |
+
)
|
73 |
+
gr.HTML(pretrained_fig.data)
|
74 |
+
|
75 |
+
with gr.TabItem(label="Instructions Finetuend Models"):
|
76 |
+
inst_finetuned_fig, ax = plt.subplots(figsize=(12, 6))
|
77 |
+
ax.set_xlim(0, 100)
|
78 |
+
inst_finetuned_dataset = open_llm_race_dataset[
|
79 |
+
open_llm_race_dataset["type"] == ModelType.IFT.name
|
80 |
+
]
|
81 |
+
inst_finetuned_dataset = inst_finetuned_dataset.pivot(
|
82 |
+
index="date", columns="model", values="score"
|
83 |
+
)
|
84 |
+
inst_finetuned_dataset.fillna(0, inplace=True)
|
85 |
+
inst_finetuned_fig = bcr.bar_chart_race(
|
86 |
+
inst_finetuned_dataset,
|
87 |
+
n_bars=10,
|
88 |
+
fixed_max=True,
|
89 |
+
period_length=1000,
|
90 |
+
steps_per_period=20,
|
91 |
+
end_period_pause=100,
|
92 |
+
bar_texttemplate="{x:.2f}",
|
93 |
+
filter_column_colors=True,
|
94 |
+
fig=inst_finetuned_fig,
|
95 |
+
)
|
96 |
+
gr.HTML(inst_finetuned_fig.data)
|
97 |
+
|
98 |
+
with gr.TabItem(label="RLHF Models"):
|
99 |
+
rl_fig, ax = plt.subplots(figsize=(12, 6))
|
100 |
+
ax.set_xlim(0, 100)
|
101 |
+
rl_dataset = open_llm_race_dataset[
|
102 |
+
open_llm_race_dataset["type"] == ModelType.IFT.name
|
103 |
+
]
|
104 |
+
rl_dataset = rl_dataset.pivot(
|
105 |
+
index="date", columns="model", values="score"
|
106 |
+
)
|
107 |
+
rl_dataset.fillna(0, inplace=True)
|
108 |
+
rl_fig = bcr.bar_chart_race(
|
109 |
+
rl_dataset,
|
110 |
+
n_bars=10,
|
111 |
+
fixed_max=True,
|
112 |
+
period_length=1000,
|
113 |
+
steps_per_period=20,
|
114 |
+
end_period_pause=100,
|
115 |
+
bar_texttemplate="{x:.2f}",
|
116 |
+
filter_column_colors=True,
|
117 |
+
fig=rl_fig,
|
118 |
+
)
|
119 |
+
gr.HTML(rl_fig.data)
|
120 |
+
|
121 |
+
# with gr.TabItem(label="Finetuned Models"):
|
122 |
+
# finetuned_dataset = open_llm_race_dataset[
|
123 |
+
# open_llm_race_dataset["type"] == ModelType.FT.name
|
124 |
+
# ]
|
125 |
+
# finetuned_dataset = finetuned_dataset.pivot(
|
126 |
+
# index="date", columns="model", values="score"
|
127 |
+
# )
|
128 |
+
# finetuned_fig = bcr.bar_chart_race(
|
129 |
+
# finetuned_dataset,
|
130 |
+
# n_bars=10,
|
131 |
+
# fixed_max=True,
|
132 |
+
# period_length=1000,
|
133 |
+
# steps_per_period=20,
|
134 |
+
# end_period_pause=100,
|
135 |
+
# bar_texttemplate="{x:.2f}",
|
136 |
+
# filter_column_colors=True,
|
137 |
+
# fig=pretrained_fig,
|
138 |
+
# )
|
139 |
+
# gr.HTML(finetuned_fig.data)
|
140 |
+
|
141 |
+
|
142 |
+
def restart_space():
|
143 |
+
HfApi().restart_space(
|
144 |
+
repo_id="https://huggingface.co/spaces/IlyasMoutawwakil/llm-bar-race",
|
145 |
+
token=os.environ.get("HF_TOKEN", None),
|
146 |
+
)
|
147 |
+
|
148 |
+
|
149 |
+
# Restart space every hour
|
150 |
+
scheduler = BackgroundScheduler()
|
151 |
+
scheduler.add_job(
|
152 |
+
func=restart_space,
|
153 |
+
trigger="interval",
|
154 |
+
seconds=3600,
|
155 |
+
)
|
156 |
+
scheduler.start()
|
157 |
+
|
158 |
+
|
159 |
+
demo.queue(concurrency_count=10).launch()
|
model_size.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
|
3 |
+
|
4 |
+
MODEL_SIZES = pd.read_pickle(
|
5 |
+
"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/resolve/main/model_size_cache.pkl"
|
6 |
+
)
|
model_types.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
import requests
|
3 |
+
from enum import Enum
|
4 |
+
from dataclasses import dataclass
|
5 |
+
|
6 |
+
|
7 |
+
@dataclass
|
8 |
+
class ModelInfo:
|
9 |
+
name: str
|
10 |
+
symbol: str # emoji
|
11 |
+
|
12 |
+
|
13 |
+
class ModelType(Enum):
|
14 |
+
PT = ModelInfo(name="pretrained", symbol="π’")
|
15 |
+
FT = ModelInfo(name="fine-tuned", symbol="πΆ")
|
16 |
+
IFT = ModelInfo(name="instruction-tuned", symbol="β")
|
17 |
+
RL = ModelInfo(name="RL-tuned", symbol="π¦")
|
18 |
+
Unknown = ModelInfo(name="Unknown, add type to request file!", symbol="β")
|
19 |
+
|
20 |
+
def to_str(self, separator=" "):
|
21 |
+
return f"{self.value.symbol}{separator}{self.value.name}"
|
22 |
+
|
23 |
+
|
24 |
+
text = requests.get(
|
25 |
+
"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/raw/main/src/display_models/model_metadata_type.py"
|
26 |
+
).text
|
27 |
+
|
28 |
+
dicts = re.findall(r"\{.*?\}", text, re.DOTALL)
|
29 |
+
|
30 |
+
MODEL_TYPES = eval(max(dicts, key=len))
|
31 |
+
MODEL_TYPES
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
git+https://github.com/dexplo/bar_chart_race.git
|
2 |
+
huggingface_hub
|
3 |
+
APScheduler
|
4 |
+
pandas
|
5 |
+
tqdm
|