davidadamczyk commited on
Commit
ec84a57
·
1 Parent(s): e3034cd
app.py CHANGED
@@ -23,7 +23,7 @@ from src.display.utils import (
23
  ModelType,
24
  fields,
25
  WeightType,
26
- Precision
27
  )
28
  from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, TOKEN, QUEUE_REPO, REPO_ID, RESULTS_REPO
29
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
@@ -33,6 +33,7 @@ from src.submission.submit import add_new_eval
33
  def restart_space():
34
  API.restart_space(repo_id=REPO_ID, token=TOKEN)
35
 
 
36
  try:
37
  print(EVAL_REQUESTS_PATH)
38
  snapshot_download(
@@ -167,7 +168,7 @@ with demo:
167
  value=False, label="Show gated/private/deleted models", interactive=True
168
  )
169
  with gr.Column(min_width=320):
170
- #with gr.Box(elem_id="box-filter"):
171
  filter_columns_type = gr.CheckboxGroup(
172
  label="Model types",
173
  choices=[t.to_str() for t in ModelType],
@@ -201,7 +202,7 @@ with demo:
201
  elem_id="leaderboard-table",
202
  interactive=False,
203
  visible=True,
204
- column_widths=["2%", "33%"]
205
  )
206
 
207
  # Dummy leaderboard for handling the case when the user uses backspace key
@@ -224,7 +225,13 @@ with demo:
224
  ],
225
  leaderboard_table,
226
  )
227
- for selector in [shown_columns, filter_columns_type, filter_columns_precision, filter_columns_size, deleted_models_visibility]:
 
 
 
 
 
 
228
  selector.change(
229
  update_table,
230
  [
 
23
  ModelType,
24
  fields,
25
  WeightType,
26
+ Precision,
27
  )
28
  from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, TOKEN, QUEUE_REPO, REPO_ID, RESULTS_REPO
29
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
 
33
  def restart_space():
34
  API.restart_space(repo_id=REPO_ID, token=TOKEN)
35
 
36
+
37
  try:
38
  print(EVAL_REQUESTS_PATH)
39
  snapshot_download(
 
168
  value=False, label="Show gated/private/deleted models", interactive=True
169
  )
170
  with gr.Column(min_width=320):
171
+ # with gr.Box(elem_id="box-filter"):
172
  filter_columns_type = gr.CheckboxGroup(
173
  label="Model types",
174
  choices=[t.to_str() for t in ModelType],
 
202
  elem_id="leaderboard-table",
203
  interactive=False,
204
  visible=True,
205
+ column_widths=["2%", "33%"],
206
  )
207
 
208
  # Dummy leaderboard for handling the case when the user uses backspace key
 
225
  ],
226
  leaderboard_table,
227
  )
228
+ for selector in [
229
+ shown_columns,
230
+ filter_columns_type,
231
+ filter_columns_precision,
232
+ filter_columns_size,
233
+ deleted_models_visibility,
234
+ ]:
235
  selector.change(
236
  update_table,
237
  [
src/display/about.py CHANGED
@@ -1,6 +1,7 @@
1
  from dataclasses import dataclass
2
  from enum import Enum
3
 
 
4
  @dataclass
5
  class Task:
6
  benchmark: str
@@ -10,9 +11,9 @@ class Task:
10
 
11
  # Init: to update with your specific keys
12
  class Tasks(Enum):
13
- # task_key in the json file, metric_key in the json file, name to display in the leaderboard
14
- task0 = Task("task_name1", "metric_name", "First task")
15
- task1 = Task("task_name2", "metric_name", "Second task")
16
 
17
 
18
  # Your leaderboard name
 
1
  from dataclasses import dataclass
2
  from enum import Enum
3
 
4
+
5
  @dataclass
6
  class Task:
7
  benchmark: str
 
11
 
12
  # Init: to update with your specific keys
13
  class Tasks(Enum):
14
+ # task_key in the json file, metric_key in the json file, name to display in the leaderboard
15
+ task0 = Task("task_agree", "accuracy", "AGREE")
16
+ task1 = Task("task_anli", "accuracy", "ANLI")
17
 
18
 
19
  # Your leaderboard name
src/display/formatting.py CHANGED
@@ -7,6 +7,7 @@ from huggingface_hub.hf_api import ModelInfo
7
 
8
  API = HfApi()
9
 
 
10
  def model_hyperlink(link, model_name):
11
  return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
12
 
 
7
 
8
  API = HfApi()
9
 
10
+
11
  def model_hyperlink(link, model_name):
12
  return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
13
 
src/display/utils.py CHANGED
@@ -5,6 +5,7 @@ import pandas as pd
5
 
6
  from src.display.about import Tasks
7
 
 
8
  def fields(raw_class):
9
  return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
10
 
@@ -21,12 +22,13 @@ class ColumnContent:
21
  never_hidden: bool = False
22
  dummy: bool = False
23
 
 
24
  ## Leaderboard columns
25
  auto_eval_column_dict = []
26
  # Init
27
  auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
28
  auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
29
- #Scores
30
  auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
31
  for task in Tasks:
32
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
@@ -46,6 +48,7 @@ auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_
46
  # We use make dataclass to dynamically fill the scores from Tasks
47
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
48
 
 
49
  ## For the queue columns in the submission tab
50
  @dataclass(frozen=True)
51
  class EvalQueueColumn: # Queue column
@@ -56,12 +59,13 @@ class EvalQueueColumn: # Queue column
56
  weight_type = ColumnContent("weight_type", "str", "Original")
57
  status = ColumnContent("status", "str", True)
58
 
 
59
  ## All the model information that we might need
60
  @dataclass
61
  class ModelDetails:
62
  name: str
63
  display_name: str = ""
64
- symbol: str = "" # emoji
65
 
66
 
67
  class ModelType(Enum):
@@ -86,11 +90,13 @@ class ModelType(Enum):
86
  return ModelType.IFT
87
  return ModelType.Unknown
88
 
 
89
  class WeightType(Enum):
90
  Adapter = ModelDetails("Adapter")
91
  Original = ModelDetails("Original")
92
  Delta = ModelDetails("Delta")
93
 
 
94
  class Precision(Enum):
95
  float16 = ModelDetails("float16")
96
  bfloat16 = ModelDetails("bfloat16")
@@ -112,6 +118,7 @@ class Precision(Enum):
112
  return Precision.qt_GPTQ
113
  return Precision.Unknown
114
 
 
115
  # Column selection
116
  COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
117
  TYPES = [c.type for c in fields(AutoEvalColumn) if not c.hidden]
 
5
 
6
  from src.display.about import Tasks
7
 
8
+
9
  def fields(raw_class):
10
  return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
11
 
 
22
  never_hidden: bool = False
23
  dummy: bool = False
24
 
25
+
26
  ## Leaderboard columns
27
  auto_eval_column_dict = []
28
  # Init
29
  auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
30
  auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
31
+ # Scores
32
  auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
33
  for task in Tasks:
34
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
 
48
  # We use make dataclass to dynamically fill the scores from Tasks
49
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
50
 
51
+
52
  ## For the queue columns in the submission tab
53
  @dataclass(frozen=True)
54
  class EvalQueueColumn: # Queue column
 
59
  weight_type = ColumnContent("weight_type", "str", "Original")
60
  status = ColumnContent("status", "str", True)
61
 
62
+
63
  ## All the model information that we might need
64
  @dataclass
65
  class ModelDetails:
66
  name: str
67
  display_name: str = ""
68
+ symbol: str = "" # emoji
69
 
70
 
71
  class ModelType(Enum):
 
90
  return ModelType.IFT
91
  return ModelType.Unknown
92
 
93
+
94
  class WeightType(Enum):
95
  Adapter = ModelDetails("Adapter")
96
  Original = ModelDetails("Original")
97
  Delta = ModelDetails("Delta")
98
 
99
+
100
  class Precision(Enum):
101
  float16 = ModelDetails("float16")
102
  bfloat16 = ModelDetails("bfloat16")
 
118
  return Precision.qt_GPTQ
119
  return Precision.Unknown
120
 
121
+
122
  # Column selection
123
  COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
124
  TYPES = [c.type for c in fields(AutoEvalColumn) if not c.hidden]
src/envs.py CHANGED
@@ -10,7 +10,7 @@ REPO_ID = f"{OWNER}/leaderboard"
10
  QUEUE_REPO = f"{OWNER}/requests"
11
  RESULTS_REPO = f"{OWNER}/results"
12
 
13
- CACHE_PATH=os.getenv("HF_HOME", ".")
14
 
15
  # Local caches
16
  EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
 
10
  QUEUE_REPO = f"{OWNER}/requests"
11
  RESULTS_REPO = f"{OWNER}/results"
12
 
13
+ CACHE_PATH = os.getenv("HF_HOME", ".")
14
 
15
  # Local caches
16
  EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
src/leaderboard/read_evals.py CHANGED
@@ -14,20 +14,20 @@ from src.submission.check_validity import is_model_on_hub
14
 
15
  @dataclass
16
  class EvalResult:
17
- eval_name: str # org_model_precision (uid)
18
- full_model: str # org/model (path on hub)
19
- org: str
20
  model: str
21
- revision: str # commit hash, "" if main
22
  results: dict
23
  precision: Precision = Precision.Unknown
24
- model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ...
25
- weight_type: WeightType = WeightType.Original # Original or Adapter
26
- architecture: str = "Unknown"
27
  license: str = "?"
28
  likes: int = 0
29
  num_params: int = 0
30
- date: str = "" # submission date of request file
31
  still_on_hub: bool = False
32
 
33
  @classmethod
@@ -83,10 +83,10 @@ class EvalResult:
83
  org=org,
84
  model=model,
85
  results=results,
86
- precision=precision,
87
- revision= config.get("model_sha", ""),
88
  still_on_hub=still_on_hub,
89
- architecture=architecture
90
  )
91
 
92
  def update_with_request_file(self, requests_path):
@@ -145,10 +145,7 @@ def get_request_file_for_model(requests_path, model_name, precision):
145
  for tmp_request_file in request_files:
146
  with open(tmp_request_file, "r") as f:
147
  req_content = json.load(f)
148
- if (
149
- req_content["status"] in ["FINISHED"]
150
- and req_content["precision"] == precision.split(".")[-1]
151
- ):
152
  request_file = tmp_request_file
153
  return request_file
154
 
@@ -187,7 +184,7 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
187
  results = []
188
  for v in eval_results.values():
189
  try:
190
- v.to_dict() # we test if the dict version is complete
191
  results.append(v)
192
  except KeyError: # not all eval values present
193
  continue
 
14
 
15
  @dataclass
16
  class EvalResult:
17
+ eval_name: str # org_model_precision (uid)
18
+ full_model: str # org/model (path on hub)
19
+ org: str
20
  model: str
21
+ revision: str # commit hash, "" if main
22
  results: dict
23
  precision: Precision = Precision.Unknown
24
+ model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ...
25
+ weight_type: WeightType = WeightType.Original # Original or Adapter
26
+ architecture: str = "Unknown"
27
  license: str = "?"
28
  likes: int = 0
29
  num_params: int = 0
30
+ date: str = "" # submission date of request file
31
  still_on_hub: bool = False
32
 
33
  @classmethod
 
83
  org=org,
84
  model=model,
85
  results=results,
86
+ precision=precision,
87
+ revision=config.get("model_sha", ""),
88
  still_on_hub=still_on_hub,
89
+ architecture=architecture,
90
  )
91
 
92
  def update_with_request_file(self, requests_path):
 
145
  for tmp_request_file in request_files:
146
  with open(tmp_request_file, "r") as f:
147
  req_content = json.load(f)
148
+ if req_content["status"] in ["FINISHED"] and req_content["precision"] == precision.split(".")[-1]:
 
 
 
149
  request_file = tmp_request_file
150
  return request_file
151
 
 
184
  results = []
185
  for v in eval_results.values():
186
  try:
187
+ v.to_dict() # we test if the dict version is complete
188
  results.append(v)
189
  except KeyError: # not all eval values present
190
  continue
src/submission/check_validity.py CHANGED
@@ -10,6 +10,7 @@ from huggingface_hub.hf_api import ModelInfo
10
  from transformers import AutoConfig
11
  from transformers.models.auto.tokenization_auto import tokenizer_class_from_name, get_tokenizer_config
12
 
 
13
  def check_model_card(repo_id: str) -> tuple[bool, str]:
14
  """Checks if the model card and license exist and have been filled"""
15
  try:
@@ -32,24 +33,27 @@ def check_model_card(repo_id: str) -> tuple[bool, str]:
32
  return True, ""
33
 
34
 
35
- def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False) -> tuple[bool, str]:
 
 
36
  """Makes sure the model is on the hub, and uses a valid configuration (in the latest transformers version)"""
37
  try:
38
- config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
 
 
39
  if test_tokenizer:
40
- tokenizer_config = get_tokenizer_config(model_name)
41
  if tokenizer_config is not None:
42
  tokenizer_class_candidate = tokenizer_config.get("tokenizer_class", None)
43
  else:
44
- tokenizer_class_candidate = config.tokenizer_class
45
-
46
 
47
  tokenizer_class = tokenizer_class_from_name(tokenizer_class_candidate)
48
  if tokenizer_class is None:
49
  return (
50
  False,
51
  f"uses {tokenizer_class_candidate}, which is not in a transformers release, therefore not supported at the moment.",
52
- None
53
  )
54
  return True, None, config
55
 
@@ -57,7 +61,7 @@ def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_rem
57
  return (
58
  False,
59
  "needs to be launched with `trust_remote_code=True`. For safety reason, we do not allow these models to be automatically submitted to the leaderboard.",
60
- None
61
  )
62
 
63
  except Exception as e:
@@ -75,10 +79,12 @@ def get_model_size(model_info: ModelInfo, precision: str):
75
  model_size = size_factor * model_size
76
  return model_size
77
 
 
78
  def get_model_arch(model_info: ModelInfo):
79
  """Gets the model architecture from the configuration"""
80
  return model_info.config.get("architectures", "Unknown")
81
 
 
82
  def already_submitted_models(requested_models_dir: str) -> set[str]:
83
  depth = 1
84
  file_names = []
 
10
  from transformers import AutoConfig
11
  from transformers.models.auto.tokenization_auto import tokenizer_class_from_name, get_tokenizer_config
12
 
13
+
14
  def check_model_card(repo_id: str) -> tuple[bool, str]:
15
  """Checks if the model card and license exist and have been filled"""
16
  try:
 
33
  return True, ""
34
 
35
 
36
+ def is_model_on_hub(
37
+ model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False
38
+ ) -> tuple[bool, str]:
39
  """Makes sure the model is on the hub, and uses a valid configuration (in the latest transformers version)"""
40
  try:
41
+ config = AutoConfig.from_pretrained(
42
+ model_name, revision=revision, trust_remote_code=trust_remote_code, token=token
43
+ )
44
  if test_tokenizer:
45
+ tokenizer_config = get_tokenizer_config(model_name)
46
  if tokenizer_config is not None:
47
  tokenizer_class_candidate = tokenizer_config.get("tokenizer_class", None)
48
  else:
49
+ tokenizer_class_candidate = config.tokenizer_class
 
50
 
51
  tokenizer_class = tokenizer_class_from_name(tokenizer_class_candidate)
52
  if tokenizer_class is None:
53
  return (
54
  False,
55
  f"uses {tokenizer_class_candidate}, which is not in a transformers release, therefore not supported at the moment.",
56
+ None,
57
  )
58
  return True, None, config
59
 
 
61
  return (
62
  False,
63
  "needs to be launched with `trust_remote_code=True`. For safety reason, we do not allow these models to be automatically submitted to the leaderboard.",
64
+ None,
65
  )
66
 
67
  except Exception as e:
 
79
  model_size = size_factor * model_size
80
  return model_size
81
 
82
+
83
  def get_model_arch(model_info: ModelInfo):
84
  """Gets the model architecture from the configuration"""
85
  return model_info.config.get("architectures", "Unknown")
86
 
87
+
88
  def already_submitted_models(requested_models_dir: str) -> set[str]:
89
  depth = 1
90
  file_names = []
src/submission/submit.py CHANGED
@@ -14,6 +14,7 @@ from src.submission.check_validity import (
14
  REQUESTED_MODELS = None
15
  USERS_TO_SUBMISSION_DATES = None
16
 
 
17
  def add_new_eval(
18
  model: str,
19
  base_model: str,
@@ -45,7 +46,9 @@ def add_new_eval(
45
 
46
  # Is the model on the hub?
47
  if weight_type in ["Delta", "Adapter"]:
48
- base_model_on_hub, error, _ = is_model_on_hub(model_name=base_model, revision=revision, token=TOKEN, test_tokenizer=True)
 
 
49
  if not base_model_on_hub:
50
  return styled_error(f'Base model "{base_model}" {error}')
51
 
 
14
  REQUESTED_MODELS = None
15
  USERS_TO_SUBMISSION_DATES = None
16
 
17
+
18
  def add_new_eval(
19
  model: str,
20
  base_model: str,
 
46
 
47
  # Is the model on the hub?
48
  if weight_type in ["Delta", "Adapter"]:
49
+ base_model_on_hub, error, _ = is_model_on_hub(
50
+ model_name=base_model, revision=revision, token=TOKEN, test_tokenizer=True
51
+ )
52
  if not base_model_on_hub:
53
  return styled_error(f'Base model "{base_model}" {error}')
54