Commit
·
ec84a57
1
Parent(s):
e3034cd
update
Browse files- app.py +11 -4
- src/display/about.py +4 -3
- src/display/formatting.py +1 -0
- src/display/utils.py +9 -2
- src/envs.py +1 -1
- src/leaderboard/read_evals.py +13 -16
- src/submission/check_validity.py +13 -7
- src/submission/submit.py +4 -1
app.py
CHANGED
@@ -23,7 +23,7 @@ from src.display.utils import (
|
|
23 |
ModelType,
|
24 |
fields,
|
25 |
WeightType,
|
26 |
-
Precision
|
27 |
)
|
28 |
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, TOKEN, QUEUE_REPO, REPO_ID, RESULTS_REPO
|
29 |
from src.populate import get_evaluation_queue_df, get_leaderboard_df
|
@@ -33,6 +33,7 @@ from src.submission.submit import add_new_eval
|
|
33 |
def restart_space():
|
34 |
API.restart_space(repo_id=REPO_ID, token=TOKEN)
|
35 |
|
|
|
36 |
try:
|
37 |
print(EVAL_REQUESTS_PATH)
|
38 |
snapshot_download(
|
@@ -167,7 +168,7 @@ with demo:
|
|
167 |
value=False, label="Show gated/private/deleted models", interactive=True
|
168 |
)
|
169 |
with gr.Column(min_width=320):
|
170 |
-
#with gr.Box(elem_id="box-filter"):
|
171 |
filter_columns_type = gr.CheckboxGroup(
|
172 |
label="Model types",
|
173 |
choices=[t.to_str() for t in ModelType],
|
@@ -201,7 +202,7 @@ with demo:
|
|
201 |
elem_id="leaderboard-table",
|
202 |
interactive=False,
|
203 |
visible=True,
|
204 |
-
column_widths=["2%", "33%"]
|
205 |
)
|
206 |
|
207 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
@@ -224,7 +225,13 @@ with demo:
|
|
224 |
],
|
225 |
leaderboard_table,
|
226 |
)
|
227 |
-
for selector in [
|
|
|
|
|
|
|
|
|
|
|
|
|
228 |
selector.change(
|
229 |
update_table,
|
230 |
[
|
|
|
23 |
ModelType,
|
24 |
fields,
|
25 |
WeightType,
|
26 |
+
Precision,
|
27 |
)
|
28 |
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, TOKEN, QUEUE_REPO, REPO_ID, RESULTS_REPO
|
29 |
from src.populate import get_evaluation_queue_df, get_leaderboard_df
|
|
|
33 |
def restart_space():
|
34 |
API.restart_space(repo_id=REPO_ID, token=TOKEN)
|
35 |
|
36 |
+
|
37 |
try:
|
38 |
print(EVAL_REQUESTS_PATH)
|
39 |
snapshot_download(
|
|
|
168 |
value=False, label="Show gated/private/deleted models", interactive=True
|
169 |
)
|
170 |
with gr.Column(min_width=320):
|
171 |
+
# with gr.Box(elem_id="box-filter"):
|
172 |
filter_columns_type = gr.CheckboxGroup(
|
173 |
label="Model types",
|
174 |
choices=[t.to_str() for t in ModelType],
|
|
|
202 |
elem_id="leaderboard-table",
|
203 |
interactive=False,
|
204 |
visible=True,
|
205 |
+
column_widths=["2%", "33%"],
|
206 |
)
|
207 |
|
208 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
|
|
225 |
],
|
226 |
leaderboard_table,
|
227 |
)
|
228 |
+
for selector in [
|
229 |
+
shown_columns,
|
230 |
+
filter_columns_type,
|
231 |
+
filter_columns_precision,
|
232 |
+
filter_columns_size,
|
233 |
+
deleted_models_visibility,
|
234 |
+
]:
|
235 |
selector.change(
|
236 |
update_table,
|
237 |
[
|
src/display/about.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
from dataclasses import dataclass
|
2 |
from enum import Enum
|
3 |
|
|
|
4 |
@dataclass
|
5 |
class Task:
|
6 |
benchmark: str
|
@@ -10,9 +11,9 @@ class Task:
|
|
10 |
|
11 |
# Init: to update with your specific keys
|
12 |
class Tasks(Enum):
|
13 |
-
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
14 |
-
task0 = Task("
|
15 |
-
task1 = Task("
|
16 |
|
17 |
|
18 |
# Your leaderboard name
|
|
|
1 |
from dataclasses import dataclass
|
2 |
from enum import Enum
|
3 |
|
4 |
+
|
5 |
@dataclass
|
6 |
class Task:
|
7 |
benchmark: str
|
|
|
11 |
|
12 |
# Init: to update with your specific keys
|
13 |
class Tasks(Enum):
|
14 |
+
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
15 |
+
task0 = Task("task_agree", "accuracy", "AGREE")
|
16 |
+
task1 = Task("task_anli", "accuracy", "ANLI")
|
17 |
|
18 |
|
19 |
# Your leaderboard name
|
src/display/formatting.py
CHANGED
@@ -7,6 +7,7 @@ from huggingface_hub.hf_api import ModelInfo
|
|
7 |
|
8 |
API = HfApi()
|
9 |
|
|
|
10 |
def model_hyperlink(link, model_name):
|
11 |
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
|
12 |
|
|
|
7 |
|
8 |
API = HfApi()
|
9 |
|
10 |
+
|
11 |
def model_hyperlink(link, model_name):
|
12 |
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
|
13 |
|
src/display/utils.py
CHANGED
@@ -5,6 +5,7 @@ import pandas as pd
|
|
5 |
|
6 |
from src.display.about import Tasks
|
7 |
|
|
|
8 |
def fields(raw_class):
|
9 |
return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
|
10 |
|
@@ -21,12 +22,13 @@ class ColumnContent:
|
|
21 |
never_hidden: bool = False
|
22 |
dummy: bool = False
|
23 |
|
|
|
24 |
## Leaderboard columns
|
25 |
auto_eval_column_dict = []
|
26 |
# Init
|
27 |
auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
|
28 |
auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
|
29 |
-
#Scores
|
30 |
auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
|
31 |
for task in Tasks:
|
32 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
@@ -46,6 +48,7 @@ auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_
|
|
46 |
# We use make dataclass to dynamically fill the scores from Tasks
|
47 |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
48 |
|
|
|
49 |
## For the queue columns in the submission tab
|
50 |
@dataclass(frozen=True)
|
51 |
class EvalQueueColumn: # Queue column
|
@@ -56,12 +59,13 @@ class EvalQueueColumn: # Queue column
|
|
56 |
weight_type = ColumnContent("weight_type", "str", "Original")
|
57 |
status = ColumnContent("status", "str", True)
|
58 |
|
|
|
59 |
## All the model information that we might need
|
60 |
@dataclass
|
61 |
class ModelDetails:
|
62 |
name: str
|
63 |
display_name: str = ""
|
64 |
-
symbol: str = ""
|
65 |
|
66 |
|
67 |
class ModelType(Enum):
|
@@ -86,11 +90,13 @@ class ModelType(Enum):
|
|
86 |
return ModelType.IFT
|
87 |
return ModelType.Unknown
|
88 |
|
|
|
89 |
class WeightType(Enum):
|
90 |
Adapter = ModelDetails("Adapter")
|
91 |
Original = ModelDetails("Original")
|
92 |
Delta = ModelDetails("Delta")
|
93 |
|
|
|
94 |
class Precision(Enum):
|
95 |
float16 = ModelDetails("float16")
|
96 |
bfloat16 = ModelDetails("bfloat16")
|
@@ -112,6 +118,7 @@ class Precision(Enum):
|
|
112 |
return Precision.qt_GPTQ
|
113 |
return Precision.Unknown
|
114 |
|
|
|
115 |
# Column selection
|
116 |
COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
|
117 |
TYPES = [c.type for c in fields(AutoEvalColumn) if not c.hidden]
|
|
|
5 |
|
6 |
from src.display.about import Tasks
|
7 |
|
8 |
+
|
9 |
def fields(raw_class):
|
10 |
return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
|
11 |
|
|
|
22 |
never_hidden: bool = False
|
23 |
dummy: bool = False
|
24 |
|
25 |
+
|
26 |
## Leaderboard columns
|
27 |
auto_eval_column_dict = []
|
28 |
# Init
|
29 |
auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
|
30 |
auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
|
31 |
+
# Scores
|
32 |
auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
|
33 |
for task in Tasks:
|
34 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
|
|
48 |
# We use make dataclass to dynamically fill the scores from Tasks
|
49 |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
50 |
|
51 |
+
|
52 |
## For the queue columns in the submission tab
|
53 |
@dataclass(frozen=True)
|
54 |
class EvalQueueColumn: # Queue column
|
|
|
59 |
weight_type = ColumnContent("weight_type", "str", "Original")
|
60 |
status = ColumnContent("status", "str", True)
|
61 |
|
62 |
+
|
63 |
## All the model information that we might need
|
64 |
@dataclass
|
65 |
class ModelDetails:
|
66 |
name: str
|
67 |
display_name: str = ""
|
68 |
+
symbol: str = "" # emoji
|
69 |
|
70 |
|
71 |
class ModelType(Enum):
|
|
|
90 |
return ModelType.IFT
|
91 |
return ModelType.Unknown
|
92 |
|
93 |
+
|
94 |
class WeightType(Enum):
|
95 |
Adapter = ModelDetails("Adapter")
|
96 |
Original = ModelDetails("Original")
|
97 |
Delta = ModelDetails("Delta")
|
98 |
|
99 |
+
|
100 |
class Precision(Enum):
|
101 |
float16 = ModelDetails("float16")
|
102 |
bfloat16 = ModelDetails("bfloat16")
|
|
|
118 |
return Precision.qt_GPTQ
|
119 |
return Precision.Unknown
|
120 |
|
121 |
+
|
122 |
# Column selection
|
123 |
COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
|
124 |
TYPES = [c.type for c in fields(AutoEvalColumn) if not c.hidden]
|
src/envs.py
CHANGED
@@ -10,7 +10,7 @@ REPO_ID = f"{OWNER}/leaderboard"
|
|
10 |
QUEUE_REPO = f"{OWNER}/requests"
|
11 |
RESULTS_REPO = f"{OWNER}/results"
|
12 |
|
13 |
-
CACHE_PATH=os.getenv("HF_HOME", ".")
|
14 |
|
15 |
# Local caches
|
16 |
EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
|
|
|
10 |
QUEUE_REPO = f"{OWNER}/requests"
|
11 |
RESULTS_REPO = f"{OWNER}/results"
|
12 |
|
13 |
+
CACHE_PATH = os.getenv("HF_HOME", ".")
|
14 |
|
15 |
# Local caches
|
16 |
EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
|
src/leaderboard/read_evals.py
CHANGED
@@ -14,20 +14,20 @@ from src.submission.check_validity import is_model_on_hub
|
|
14 |
|
15 |
@dataclass
|
16 |
class EvalResult:
|
17 |
-
eval_name: str
|
18 |
-
full_model: str
|
19 |
-
org: str
|
20 |
model: str
|
21 |
-
revision: str
|
22 |
results: dict
|
23 |
precision: Precision = Precision.Unknown
|
24 |
-
model_type: ModelType = ModelType.Unknown
|
25 |
-
weight_type: WeightType = WeightType.Original
|
26 |
-
architecture: str = "Unknown"
|
27 |
license: str = "?"
|
28 |
likes: int = 0
|
29 |
num_params: int = 0
|
30 |
-
date: str = ""
|
31 |
still_on_hub: bool = False
|
32 |
|
33 |
@classmethod
|
@@ -83,10 +83,10 @@ class EvalResult:
|
|
83 |
org=org,
|
84 |
model=model,
|
85 |
results=results,
|
86 |
-
precision=precision,
|
87 |
-
revision=
|
88 |
still_on_hub=still_on_hub,
|
89 |
-
architecture=architecture
|
90 |
)
|
91 |
|
92 |
def update_with_request_file(self, requests_path):
|
@@ -145,10 +145,7 @@ def get_request_file_for_model(requests_path, model_name, precision):
|
|
145 |
for tmp_request_file in request_files:
|
146 |
with open(tmp_request_file, "r") as f:
|
147 |
req_content = json.load(f)
|
148 |
-
if (
|
149 |
-
req_content["status"] in ["FINISHED"]
|
150 |
-
and req_content["precision"] == precision.split(".")[-1]
|
151 |
-
):
|
152 |
request_file = tmp_request_file
|
153 |
return request_file
|
154 |
|
@@ -187,7 +184,7 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
|
|
187 |
results = []
|
188 |
for v in eval_results.values():
|
189 |
try:
|
190 |
-
v.to_dict()
|
191 |
results.append(v)
|
192 |
except KeyError: # not all eval values present
|
193 |
continue
|
|
|
14 |
|
15 |
@dataclass
|
16 |
class EvalResult:
|
17 |
+
eval_name: str # org_model_precision (uid)
|
18 |
+
full_model: str # org/model (path on hub)
|
19 |
+
org: str
|
20 |
model: str
|
21 |
+
revision: str # commit hash, "" if main
|
22 |
results: dict
|
23 |
precision: Precision = Precision.Unknown
|
24 |
+
model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ...
|
25 |
+
weight_type: WeightType = WeightType.Original # Original or Adapter
|
26 |
+
architecture: str = "Unknown"
|
27 |
license: str = "?"
|
28 |
likes: int = 0
|
29 |
num_params: int = 0
|
30 |
+
date: str = "" # submission date of request file
|
31 |
still_on_hub: bool = False
|
32 |
|
33 |
@classmethod
|
|
|
83 |
org=org,
|
84 |
model=model,
|
85 |
results=results,
|
86 |
+
precision=precision,
|
87 |
+
revision=config.get("model_sha", ""),
|
88 |
still_on_hub=still_on_hub,
|
89 |
+
architecture=architecture,
|
90 |
)
|
91 |
|
92 |
def update_with_request_file(self, requests_path):
|
|
|
145 |
for tmp_request_file in request_files:
|
146 |
with open(tmp_request_file, "r") as f:
|
147 |
req_content = json.load(f)
|
148 |
+
if req_content["status"] in ["FINISHED"] and req_content["precision"] == precision.split(".")[-1]:
|
|
|
|
|
|
|
149 |
request_file = tmp_request_file
|
150 |
return request_file
|
151 |
|
|
|
184 |
results = []
|
185 |
for v in eval_results.values():
|
186 |
try:
|
187 |
+
v.to_dict() # we test if the dict version is complete
|
188 |
results.append(v)
|
189 |
except KeyError: # not all eval values present
|
190 |
continue
|
src/submission/check_validity.py
CHANGED
@@ -10,6 +10,7 @@ from huggingface_hub.hf_api import ModelInfo
|
|
10 |
from transformers import AutoConfig
|
11 |
from transformers.models.auto.tokenization_auto import tokenizer_class_from_name, get_tokenizer_config
|
12 |
|
|
|
13 |
def check_model_card(repo_id: str) -> tuple[bool, str]:
|
14 |
"""Checks if the model card and license exist and have been filled"""
|
15 |
try:
|
@@ -32,24 +33,27 @@ def check_model_card(repo_id: str) -> tuple[bool, str]:
|
|
32 |
return True, ""
|
33 |
|
34 |
|
35 |
-
def is_model_on_hub(
|
|
|
|
|
36 |
"""Makes sure the model is on the hub, and uses a valid configuration (in the latest transformers version)"""
|
37 |
try:
|
38 |
-
config = AutoConfig.from_pretrained(
|
|
|
|
|
39 |
if test_tokenizer:
|
40 |
-
tokenizer_config = get_tokenizer_config(model_name)
|
41 |
if tokenizer_config is not None:
|
42 |
tokenizer_class_candidate = tokenizer_config.get("tokenizer_class", None)
|
43 |
else:
|
44 |
-
tokenizer_class_candidate = config.tokenizer_class
|
45 |
-
|
46 |
|
47 |
tokenizer_class = tokenizer_class_from_name(tokenizer_class_candidate)
|
48 |
if tokenizer_class is None:
|
49 |
return (
|
50 |
False,
|
51 |
f"uses {tokenizer_class_candidate}, which is not in a transformers release, therefore not supported at the moment.",
|
52 |
-
None
|
53 |
)
|
54 |
return True, None, config
|
55 |
|
@@ -57,7 +61,7 @@ def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_rem
|
|
57 |
return (
|
58 |
False,
|
59 |
"needs to be launched with `trust_remote_code=True`. For safety reason, we do not allow these models to be automatically submitted to the leaderboard.",
|
60 |
-
None
|
61 |
)
|
62 |
|
63 |
except Exception as e:
|
@@ -75,10 +79,12 @@ def get_model_size(model_info: ModelInfo, precision: str):
|
|
75 |
model_size = size_factor * model_size
|
76 |
return model_size
|
77 |
|
|
|
78 |
def get_model_arch(model_info: ModelInfo):
|
79 |
"""Gets the model architecture from the configuration"""
|
80 |
return model_info.config.get("architectures", "Unknown")
|
81 |
|
|
|
82 |
def already_submitted_models(requested_models_dir: str) -> set[str]:
|
83 |
depth = 1
|
84 |
file_names = []
|
|
|
10 |
from transformers import AutoConfig
|
11 |
from transformers.models.auto.tokenization_auto import tokenizer_class_from_name, get_tokenizer_config
|
12 |
|
13 |
+
|
14 |
def check_model_card(repo_id: str) -> tuple[bool, str]:
|
15 |
"""Checks if the model card and license exist and have been filled"""
|
16 |
try:
|
|
|
33 |
return True, ""
|
34 |
|
35 |
|
36 |
+
def is_model_on_hub(
|
37 |
+
model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False
|
38 |
+
) -> tuple[bool, str]:
|
39 |
"""Makes sure the model is on the hub, and uses a valid configuration (in the latest transformers version)"""
|
40 |
try:
|
41 |
+
config = AutoConfig.from_pretrained(
|
42 |
+
model_name, revision=revision, trust_remote_code=trust_remote_code, token=token
|
43 |
+
)
|
44 |
if test_tokenizer:
|
45 |
+
tokenizer_config = get_tokenizer_config(model_name)
|
46 |
if tokenizer_config is not None:
|
47 |
tokenizer_class_candidate = tokenizer_config.get("tokenizer_class", None)
|
48 |
else:
|
49 |
+
tokenizer_class_candidate = config.tokenizer_class
|
|
|
50 |
|
51 |
tokenizer_class = tokenizer_class_from_name(tokenizer_class_candidate)
|
52 |
if tokenizer_class is None:
|
53 |
return (
|
54 |
False,
|
55 |
f"uses {tokenizer_class_candidate}, which is not in a transformers release, therefore not supported at the moment.",
|
56 |
+
None,
|
57 |
)
|
58 |
return True, None, config
|
59 |
|
|
|
61 |
return (
|
62 |
False,
|
63 |
"needs to be launched with `trust_remote_code=True`. For safety reason, we do not allow these models to be automatically submitted to the leaderboard.",
|
64 |
+
None,
|
65 |
)
|
66 |
|
67 |
except Exception as e:
|
|
|
79 |
model_size = size_factor * model_size
|
80 |
return model_size
|
81 |
|
82 |
+
|
83 |
def get_model_arch(model_info: ModelInfo):
|
84 |
"""Gets the model architecture from the configuration"""
|
85 |
return model_info.config.get("architectures", "Unknown")
|
86 |
|
87 |
+
|
88 |
def already_submitted_models(requested_models_dir: str) -> set[str]:
|
89 |
depth = 1
|
90 |
file_names = []
|
src/submission/submit.py
CHANGED
@@ -14,6 +14,7 @@ from src.submission.check_validity import (
|
|
14 |
REQUESTED_MODELS = None
|
15 |
USERS_TO_SUBMISSION_DATES = None
|
16 |
|
|
|
17 |
def add_new_eval(
|
18 |
model: str,
|
19 |
base_model: str,
|
@@ -45,7 +46,9 @@ def add_new_eval(
|
|
45 |
|
46 |
# Is the model on the hub?
|
47 |
if weight_type in ["Delta", "Adapter"]:
|
48 |
-
base_model_on_hub, error, _ = is_model_on_hub(
|
|
|
|
|
49 |
if not base_model_on_hub:
|
50 |
return styled_error(f'Base model "{base_model}" {error}')
|
51 |
|
|
|
14 |
REQUESTED_MODELS = None
|
15 |
USERS_TO_SUBMISSION_DATES = None
|
16 |
|
17 |
+
|
18 |
def add_new_eval(
|
19 |
model: str,
|
20 |
base_model: str,
|
|
|
46 |
|
47 |
# Is the model on the hub?
|
48 |
if weight_type in ["Delta", "Adapter"]:
|
49 |
+
base_model_on_hub, error, _ = is_model_on_hub(
|
50 |
+
model_name=base_model, revision=revision, token=TOKEN, test_tokenizer=True
|
51 |
+
)
|
52 |
if not base_model_on_hub:
|
53 |
return styled_error(f'Base model "{base_model}" {error}')
|
54 |
|