IlyasMoutawwakil HF staff commited on
Commit
ad8e780
β€’
1 Parent(s): 8e88086
Files changed (4) hide show
  1. app.py +159 -0
  2. model_size.py +6 -0
  3. model_types.py +31 -0
  4. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from apscheduler.schedulers.background import BackgroundScheduler
2
+ from model_types import MODEL_TYPES, ModelType
3
+ from huggingface_hub import HfApi
4
+ import matplotlib.pyplot as plt
5
+ import bar_chart_race as bcr
6
+ import pandas as pd
7
+ import gradio as gr
8
+ import os
9
+
10
+ # MODEL_SIZES = pd.read_pickle(
11
+ # "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/resolve/main/model_size_cache.pkl"
12
+ # )
13
+
14
+ # read in the data
15
+ open_llm_race_dataset = pd.read_csv("open_llm_race_dataset.csv")
16
+ # resample for ever model to a daily frequency
17
+ open_llm_race_dataset["date"] = pd.to_datetime(open_llm_race_dataset["date"])
18
+ open_llm_race_dataset = (
19
+ open_llm_race_dataset.set_index("date", drop=True)
20
+ .groupby("model", as_index=False)
21
+ .resample("D", how="last", closed="right", fill_method="ffill")
22
+ .last()
23
+ .reset_index(drop=False)
24
+ )
25
+ # filter
26
+ open_llm_race_dataset["date"] = open_llm_race_dataset["date"].dt.strftime("%Y-%m-%d")
27
+ open_llm_race_dataset = open_llm_race_dataset[
28
+ open_llm_race_dataset["date"] >= "2023-07-10"
29
+ ]
30
+ open_llm_race_dataset = open_llm_race_dataset[["date", "score", "model"]]
31
+ # drop nan values
32
+ open_llm_race_dataset.dropna(inplace=True)
33
+ # drop duplicates on model and date
34
+ open_llm_race_dataset.drop_duplicates(subset=["model", "date"], inplace=True)
35
+ # add the model type
36
+ open_llm_race_dataset["type"] = open_llm_race_dataset["model"].apply(
37
+ lambda x: MODEL_TYPES[x].name if x in MODEL_TYPES else ModelType.Unknown.name
38
+ )
39
+
40
+ # # add the model size
41
+ # open_llm_race_dataset["size"] = open_llm_race_dataset["model"].apply(
42
+ # lambda x: MODEL_SIZES[x] if x in MODEL_SIZES else None
43
+ # )
44
+
45
+ # Demo interface
46
+ demo = gr.Blocks()
47
+ with demo:
48
+ # leaderboard title
49
+ gr.HTML("<h1>Open-LLM Race πŸƒβ€β™‚οΈ</h1>")
50
+
51
+ with gr.Tabs():
52
+ with gr.TabItem(label="Pretrained Models"):
53
+ pretrained_fig, ax = plt.subplots(figsize=(12, 6))
54
+ ax.set_xlim(0, 100)
55
+ pretrained_dataset = open_llm_race_dataset[
56
+ open_llm_race_dataset["type"] == ModelType.PT.name
57
+ ]
58
+ pretrained_dataset = pretrained_dataset.pivot(
59
+ index="date", columns="model", values="score"
60
+ )
61
+ pretrained_dataset.fillna(0, inplace=True)
62
+ pretrained_fig = bcr.bar_chart_race(
63
+ pretrained_dataset,
64
+ n_bars=10,
65
+ fixed_max=True,
66
+ period_length=1000,
67
+ steps_per_period=20,
68
+ end_period_pause=100,
69
+ bar_texttemplate="{x:.2f}",
70
+ filter_column_colors=True,
71
+ fig=pretrained_fig,
72
+ )
73
+ gr.HTML(pretrained_fig.data)
74
+
75
+ with gr.TabItem(label="Instructions Finetuend Models"):
76
+ inst_finetuned_fig, ax = plt.subplots(figsize=(12, 6))
77
+ ax.set_xlim(0, 100)
78
+ inst_finetuned_dataset = open_llm_race_dataset[
79
+ open_llm_race_dataset["type"] == ModelType.IFT.name
80
+ ]
81
+ inst_finetuned_dataset = inst_finetuned_dataset.pivot(
82
+ index="date", columns="model", values="score"
83
+ )
84
+ inst_finetuned_dataset.fillna(0, inplace=True)
85
+ inst_finetuned_fig = bcr.bar_chart_race(
86
+ inst_finetuned_dataset,
87
+ n_bars=10,
88
+ fixed_max=True,
89
+ period_length=1000,
90
+ steps_per_period=20,
91
+ end_period_pause=100,
92
+ bar_texttemplate="{x:.2f}",
93
+ filter_column_colors=True,
94
+ fig=inst_finetuned_fig,
95
+ )
96
+ gr.HTML(inst_finetuned_fig.data)
97
+
98
+ with gr.TabItem(label="RLHF Models"):
99
+ rl_fig, ax = plt.subplots(figsize=(12, 6))
100
+ ax.set_xlim(0, 100)
101
+ rl_dataset = open_llm_race_dataset[
102
+ open_llm_race_dataset["type"] == ModelType.IFT.name
103
+ ]
104
+ rl_dataset = rl_dataset.pivot(
105
+ index="date", columns="model", values="score"
106
+ )
107
+ rl_dataset.fillna(0, inplace=True)
108
+ rl_fig = bcr.bar_chart_race(
109
+ rl_dataset,
110
+ n_bars=10,
111
+ fixed_max=True,
112
+ period_length=1000,
113
+ steps_per_period=20,
114
+ end_period_pause=100,
115
+ bar_texttemplate="{x:.2f}",
116
+ filter_column_colors=True,
117
+ fig=rl_fig,
118
+ )
119
+ gr.HTML(rl_fig.data)
120
+
121
+ # with gr.TabItem(label="Finetuned Models"):
122
+ # finetuned_dataset = open_llm_race_dataset[
123
+ # open_llm_race_dataset["type"] == ModelType.FT.name
124
+ # ]
125
+ # finetuned_dataset = finetuned_dataset.pivot(
126
+ # index="date", columns="model", values="score"
127
+ # )
128
+ # finetuned_fig = bcr.bar_chart_race(
129
+ # finetuned_dataset,
130
+ # n_bars=10,
131
+ # fixed_max=True,
132
+ # period_length=1000,
133
+ # steps_per_period=20,
134
+ # end_period_pause=100,
135
+ # bar_texttemplate="{x:.2f}",
136
+ # filter_column_colors=True,
137
+ # fig=pretrained_fig,
138
+ # )
139
+ # gr.HTML(finetuned_fig.data)
140
+
141
+
142
+ def restart_space():
143
+ HfApi().restart_space(
144
+ repo_id="https://huggingface.co/spaces/IlyasMoutawwakil/llm-bar-race",
145
+ token=os.environ.get("HF_TOKEN", None),
146
+ )
147
+
148
+
149
+ # Restart space every hour
150
+ scheduler = BackgroundScheduler()
151
+ scheduler.add_job(
152
+ func=restart_space,
153
+ trigger="interval",
154
+ seconds=3600,
155
+ )
156
+ scheduler.start()
157
+
158
+
159
+ demo.queue(concurrency_count=10).launch()
model_size.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+
4
+ MODEL_SIZES = pd.read_pickle(
5
+ "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/resolve/main/model_size_cache.pkl"
6
+ )
model_types.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import requests
3
+ from enum import Enum
4
+ from dataclasses import dataclass
5
+
6
+
7
+ @dataclass
8
+ class ModelInfo:
9
+ name: str
10
+ symbol: str # emoji
11
+
12
+
13
+ class ModelType(Enum):
14
+ PT = ModelInfo(name="pretrained", symbol="🟒")
15
+ FT = ModelInfo(name="fine-tuned", symbol="πŸ”Ά")
16
+ IFT = ModelInfo(name="instruction-tuned", symbol="β­•")
17
+ RL = ModelInfo(name="RL-tuned", symbol="🟦")
18
+ Unknown = ModelInfo(name="Unknown, add type to request file!", symbol="❓")
19
+
20
+ def to_str(self, separator=" "):
21
+ return f"{self.value.symbol}{separator}{self.value.name}"
22
+
23
+
24
+ text = requests.get(
25
+ "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/raw/main/src/display_models/model_metadata_type.py"
26
+ ).text
27
+
28
+ dicts = re.findall(r"\{.*?\}", text, re.DOTALL)
29
+
30
+ MODEL_TYPES = eval(max(dicts, key=len))
31
+ MODEL_TYPES
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ git+https://github.com/dexplo/bar_chart_race.git
2
+ huggingface_hub
3
+ APScheduler
4
+ pandas
5
+ tqdm