future-xy commited on
Commit
1c22d8d
·
1 Parent(s): 7c45643

support selecting inference framework

Browse files
app.py CHANGED
@@ -33,6 +33,7 @@ from src.display.utils import (
33
  TYPES,
34
  AutoEvalColumn,
35
  ModelType,
 
36
  fields,
37
  WeightType,
38
  Precision,
@@ -183,6 +184,14 @@ with demo:
183
  )
184
 
185
  with gr.Column(min_width=320):
 
 
 
 
 
 
 
 
186
  filter_columns_type = gr.CheckboxGroup(
187
  label="Model types",
188
  choices=[t.to_str() for t in ModelType],
@@ -199,13 +208,13 @@ with demo:
199
  elem_id="filter-columns-precision",
200
  )
201
 
202
- filter_columns_size = gr.CheckboxGroup(
203
- label="Model sizes (in billions of parameters)",
204
- choices=list(NUMERIC_INTERVALS.keys()),
205
- value=list(NUMERIC_INTERVALS.keys()),
206
- interactive=True,
207
- elem_id="filter-columns-size",
208
- )
209
 
210
  # breakpoint()
211
 
@@ -308,6 +317,15 @@ with demo:
308
  with gr.Row():
309
  gr.Markdown("# Submit your model here", elem_classes="markdown-text")
310
 
 
 
 
 
 
 
 
 
 
311
  with gr.Row():
312
  with gr.Column():
313
  model_name_textbox = gr.Textbox(label="Model name")
 
33
  TYPES,
34
  AutoEvalColumn,
35
  ModelType,
36
+ InferenceFramework,
37
  fields,
38
  WeightType,
39
  Precision,
 
184
  )
185
 
186
  with gr.Column(min_width=320):
187
+ filter_columns_size = gr.CheckboxGroup(
188
+ label="Inference frameworks",
189
+ choices=[t.to_str() for t in InferenceFramework],
190
+ value=[t.to_str() for t in InferenceFramework],
191
+ interactive=True,
192
+ elem_id="filter-columns-size",
193
+ )
194
+
195
  filter_columns_type = gr.CheckboxGroup(
196
  label="Model types",
197
  choices=[t.to_str() for t in ModelType],
 
208
  elem_id="filter-columns-precision",
209
  )
210
 
211
+ # filter_columns_size = gr.CheckboxGroup(
212
+ # label="Model sizes (in billions of parameters)",
213
+ # choices=list(NUMERIC_INTERVALS.keys()),
214
+ # value=list(NUMERIC_INTERVALS.keys()),
215
+ # interactive=True,
216
+ # elem_id="filter-columns-size",
217
+ # )
218
 
219
  # breakpoint()
220
 
 
317
  with gr.Row():
318
  gr.Markdown("# Submit your model here", elem_classes="markdown-text")
319
 
320
+ with gr.Row():
321
+ inference_framework = gr.Dropdown(
322
+ choices=[t.to_str() for t in InferenceFramework],
323
+ label="Inference framework",
324
+ multiselect=False,
325
+ value=None,
326
+ interactive=True,
327
+ )
328
+
329
  with gr.Row():
330
  with gr.Column():
331
  model_name_textbox = gr.Textbox(label="Model name")
src/backend/manage_requests.py CHANGED
@@ -16,6 +16,7 @@ class EvalRequest:
16
  json_filepath: str
17
  weight_type: str = "Original"
18
  model_type: str = "" # pretrained, finetuned, with RL
 
19
  precision: str = "" # float16, bfloat16
20
  base_model: Optional[str] = None # for adapter models
21
  revision: str = "main" # commit
 
16
  json_filepath: str
17
  weight_type: str = "Original"
18
  model_type: str = "" # pretrained, finetuned, with RL
19
+ inference_framework: str = "HF-Chat"
20
  precision: str = "" # float16, bfloat16
21
  base_model: Optional[str] = None # for adapter models
22
  revision: str = "main" # commit
src/backend/run_eval_suite.py CHANGED
@@ -42,13 +42,13 @@ def run_evaluation(
42
  # task_names = utils.pattern_match(task_names, tasks.ALL_TASKS)
43
 
44
  print(f"Selected Tasks: {task_names}")
45
- print(f"Eval Request: {eval_request.get_model_args()}")
46
  print(
47
  f"Num Fewshot: {num_fewshot}, Batch Size: {batch_size}, Device: {device}, Use Cache: {use_cache}, Limit: {limit}"
48
  )
49
  # hf-chat is implemented to use apply_chat_template
50
  results = evaluator.simple_evaluate(
51
- model="moe-infinity", # "hf-causal-experimental", # "hf-causal", hf-chat
52
  model_args=eval_request.get_model_args(),
53
  tasks=task_names,
54
  num_fewshot=num_fewshot,
@@ -65,6 +65,7 @@ def run_evaluation(
65
  results["config"]["model_dtype"] = eval_request.precision
66
  results["config"]["model_name"] = eval_request.model
67
  results["config"]["model_sha"] = eval_request.revision
 
68
 
69
  if max_nb_samples is not None:
70
  if "samples" in results:
 
42
  # task_names = utils.pattern_match(task_names, tasks.ALL_TASKS)
43
 
44
  print(f"Selected Tasks: {task_names}")
45
+ print(f"Eval Request: {eval_request}")
46
  print(
47
  f"Num Fewshot: {num_fewshot}, Batch Size: {batch_size}, Device: {device}, Use Cache: {use_cache}, Limit: {limit}"
48
  )
49
  # hf-chat is implemented to use apply_chat_template
50
  results = evaluator.simple_evaluate(
51
+ model=eval_request.inference_framework, # "hf-causal-experimental", # "hf-causal", hf-chat
52
  model_args=eval_request.get_model_args(),
53
  tasks=task_names,
54
  num_fewshot=num_fewshot,
 
65
  results["config"]["model_dtype"] = eval_request.precision
66
  results["config"]["model_name"] = eval_request.model
67
  results["config"]["model_sha"] = eval_request.revision
68
+ results["config"]["inference_framework"] = eval_request.inference_framework
69
 
70
  if max_nb_samples is not None:
71
  if "samples" in results:
src/display/utils.py CHANGED
@@ -70,6 +70,9 @@ auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "ma
70
  # #Scores
71
  # # auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Avg", "number", True)])
72
 
 
 
 
73
  for task in Tasks:
74
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
75
 
@@ -129,6 +132,24 @@ class ModelType(Enum):
129
  return ModelType.Unknown
130
 
131
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  class WeightType(Enum):
133
  Adapter = ModelDetails("Adapter")
134
  Original = ModelDetails("Original")
 
70
  # #Scores
71
  # # auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Avg", "number", True)])
72
 
73
+ # Inference framework
74
+ auto_eval_column_dict.append(["inference_framework", ColumnContent, ColumnContent("Inference framework", "str", True)])
75
+
76
  for task in Tasks:
77
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
78
 
 
132
  return ModelType.Unknown
133
 
134
 
135
+ class InferenceFramework(Enum):
136
+ # "moe-infinity", hf-chat
137
+ MoE_Infinity = ModelDetails("MoE-Infinity")
138
+ HF_Chat = ModelDetails("HF-Chat")
139
+ Unknown = ModelDetails("?")
140
+
141
+ def to_str(self):
142
+ return self.value.name
143
+
144
+ @staticmethod
145
+ def from_str(inference_framework: str):
146
+ if inference_framework in ["moe-infinity"]:
147
+ return InferenceFramework.MoE_Infinity
148
+ if inference_framework in ["hf-chat"]:
149
+ return InferenceFramework.HF_Chat
150
+ return InferenceFramework.Unknown
151
+
152
+
153
  class WeightType(Enum):
154
  Adapter = ModelDetails("Adapter")
155
  Original = ModelDetails("Original")
src/leaderboard/read_evals.py CHANGED
@@ -41,6 +41,7 @@ class EvalResult:
41
  num_params: int = 0
42
  date: str = "" # submission date of request file
43
  still_on_hub: bool = False
 
44
 
45
  @staticmethod
46
  def init_from_json_file(json_filepath, is_backend: bool = False):
@@ -48,6 +49,8 @@ class EvalResult:
48
  with open(json_filepath) as fp:
49
  data = json.load(fp)
50
 
 
 
51
  # We manage the legacy config format
52
  config = data.get("config", data.get("config_general", None))
53
 
@@ -118,6 +121,7 @@ class EvalResult:
118
  revision=config.get("model_sha", ""),
119
  still_on_hub=still_on_hub,
120
  architecture=architecture,
 
121
  )
122
 
123
  return res
@@ -136,6 +140,7 @@ class EvalResult:
136
  self.likes = request.get("likes", 0)
137
  self.num_params = request.get("params", 0)
138
  self.date = request.get("submitted_time", "")
 
139
  except Exception as e:
140
  print(f"Could not find request file for {self.org}/{self.model} -- path: {requests_path} -- {e}")
141
 
@@ -166,6 +171,7 @@ class EvalResult:
166
  AutoEvalColumn.likes.name: self.likes,
167
  AutoEvalColumn.params.name: self.num_params,
168
  AutoEvalColumn.still_on_hub.name: self.still_on_hub,
 
169
  }
170
 
171
  for task in Tasks:
 
41
  num_params: int = 0
42
  date: str = "" # submission date of request file
43
  still_on_hub: bool = False
44
+ inference_framework: str = "Unknown"
45
 
46
  @staticmethod
47
  def init_from_json_file(json_filepath, is_backend: bool = False):
 
49
  with open(json_filepath) as fp:
50
  data = json.load(fp)
51
 
52
+ inference_framework = data.get("inference_framework", "Unknown")
53
+
54
  # We manage the legacy config format
55
  config = data.get("config", data.get("config_general", None))
56
 
 
121
  revision=config.get("model_sha", ""),
122
  still_on_hub=still_on_hub,
123
  architecture=architecture,
124
+ inference_framework=inference_framework,
125
  )
126
 
127
  return res
 
140
  self.likes = request.get("likes", 0)
141
  self.num_params = request.get("params", 0)
142
  self.date = request.get("submitted_time", "")
143
+ self.inference_framework = request.get("inference_framework", "Unknown")
144
  except Exception as e:
145
  print(f"Could not find request file for {self.org}/{self.model} -- path: {requests_path} -- {e}")
146
 
 
171
  AutoEvalColumn.likes.name: self.likes,
172
  AutoEvalColumn.params.name: self.num_params,
173
  AutoEvalColumn.still_on_hub.name: self.still_on_hub,
174
+ AutoEvalColumn.inference_framework.name: self.inference_framework,
175
  }
176
 
177
  for task in Tasks:
src/populate.py CHANGED
@@ -3,6 +3,7 @@ import os
3
  from tqdm import tqdm
4
  import copy
5
  import pandas as pd
 
6
 
7
  from src.display.formatting import has_no_nan_values, make_clickable_model
8
  from src.display.utils import AutoEvalColumn, EvalQueueColumn
@@ -63,6 +64,9 @@ def get_leaderboard_df(
63
 
64
  # if AutoEvalColumn.average.name in df:
65
  # df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
 
 
 
66
 
67
  if not df.empty:
68
  df = df[cols].round(decimals=2)
 
3
  from tqdm import tqdm
4
  import copy
5
  import pandas as pd
6
+ import numpy as np
7
 
8
  from src.display.formatting import has_no_nan_values, make_clickable_model
9
  from src.display.utils import AutoEvalColumn, EvalQueueColumn
 
64
 
65
  # if AutoEvalColumn.average.name in df:
66
  # df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
67
+ for col in cols:
68
+ if col not in df.columns:
69
+ df[col] = np.nan
70
 
71
  if not df.empty:
72
  df = df[cols].round(decimals=2)