gsaivinay commited on
Commit
e5cbf2a
Β·
1 Parent(s): ee366c0
app.py CHANGED
@@ -114,6 +114,8 @@ leaderboard_df = original_df.copy()
114
  pending_eval_queue_df,
115
  ) = get_evaluation_queue_df(eval_queue, eval_queue_private, EVAL_REQUESTS_PATH, EVAL_COLS)
116
 
 
 
117
 
118
  ## INTERACTION FUNCTIONS
119
  def add_new_eval(
@@ -216,8 +218,8 @@ def change_tab(query_param: str):
216
 
217
 
218
  # Searching and filtering
219
- def update_table(hidden_df: pd.DataFrame, current_columns_df: pd.DataFrame, columns: list, type_query: list, size_query: list, show_deleted: bool, query: str):
220
- filtered_df = filter_models(hidden_df, type_query, size_query, show_deleted)
221
  if query != "":
222
  filtered_df = search_table(filtered_df, query)
223
  df = select_columns(filtered_df, columns)
@@ -249,7 +251,7 @@ NUMERIC_INTERVALS = {
249
  }
250
 
251
  def filter_models(
252
- df: pd.DataFrame, type_query: list, size_query: list, show_deleted: bool
253
  ) -> pd.DataFrame:
254
  # Show all models
255
  if show_deleted:
@@ -259,6 +261,7 @@ def filter_models(
259
 
260
  type_emoji = [t[0] for t in type_query]
261
  filtered_df = filtered_df[df[AutoEvalColumn.model_type_symbol.name].isin(type_emoji)]
 
262
 
263
  numeric_interval = pd.IntervalIndex(sorted([NUMERIC_INTERVALS[s] for s in size_query]))
264
  params_column = pd.to_numeric(df[AutoEvalColumn.params.name], errors="coerce")
@@ -277,6 +280,12 @@ with demo:
277
  with gr.TabItem("πŸ… LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
278
  with gr.Row():
279
  with gr.Column():
 
 
 
 
 
 
280
  with gr.Row():
281
  shown_columns = gr.CheckboxGroup(
282
  choices=[
@@ -310,11 +319,6 @@ with demo:
310
  value=True, label="Show gated/private/deleted models", interactive=True
311
  )
312
  with gr.Column(min_width=320):
313
- search_bar = gr.Textbox(
314
- placeholder="πŸ” Search for your model and press ENTER...",
315
- show_label=False,
316
- elem_id="search-bar",
317
- )
318
  with gr.Box(elem_id="box-filter"):
319
  filter_columns_type = gr.CheckboxGroup(
320
  label="Model types",
@@ -333,6 +337,13 @@ with demo:
333
  interactive=True,
334
  elem_id="filter-columns-type",
335
  )
 
 
 
 
 
 
 
336
  filter_columns_size = gr.CheckboxGroup(
337
  label="Model sizes",
338
  choices=list(NUMERIC_INTERVALS.keys()),
@@ -375,6 +386,7 @@ with demo:
375
  leaderboard_table,
376
  shown_columns,
377
  filter_columns_type,
 
378
  filter_columns_size,
379
  deleted_models_visibility,
380
  search_bar,
@@ -388,6 +400,7 @@ with demo:
388
  leaderboard_table,
389
  shown_columns,
390
  filter_columns_type,
 
391
  filter_columns_size,
392
  deleted_models_visibility,
393
  search_bar,
@@ -402,6 +415,22 @@ with demo:
402
  leaderboard_table,
403
  shown_columns,
404
  filter_columns_type,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
405
  filter_columns_size,
406
  deleted_models_visibility,
407
  search_bar,
@@ -416,6 +445,7 @@ with demo:
416
  leaderboard_table,
417
  shown_columns,
418
  filter_columns_type,
 
419
  filter_columns_size,
420
  deleted_models_visibility,
421
  search_bar,
@@ -430,6 +460,7 @@ with demo:
430
  leaderboard_table,
431
  shown_columns,
432
  filter_columns_type,
 
433
  filter_columns_size,
434
  deleted_models_visibility,
435
  search_bar,
 
114
  pending_eval_queue_df,
115
  ) = get_evaluation_queue_df(eval_queue, eval_queue_private, EVAL_REQUESTS_PATH, EVAL_COLS)
116
 
117
+ print(leaderboard_df["Precision"].unique())
118
+
119
 
120
  ## INTERACTION FUNCTIONS
121
  def add_new_eval(
 
218
 
219
 
220
  # Searching and filtering
221
+ def update_table(hidden_df: pd.DataFrame, current_columns_df: pd.DataFrame, columns: list, type_query: list, precision_query: str, size_query: list, show_deleted: bool, query: str):
222
+ filtered_df = filter_models(hidden_df, type_query, size_query, precision_query, show_deleted)
223
  if query != "":
224
  filtered_df = search_table(filtered_df, query)
225
  df = select_columns(filtered_df, columns)
 
251
  }
252
 
253
  def filter_models(
254
+ df: pd.DataFrame, type_query: list, size_query: list, precision_query: list, show_deleted: bool
255
  ) -> pd.DataFrame:
256
  # Show all models
257
  if show_deleted:
 
261
 
262
  type_emoji = [t[0] for t in type_query]
263
  filtered_df = filtered_df[df[AutoEvalColumn.model_type_symbol.name].isin(type_emoji)]
264
+ filtered_df = filtered_df[df[AutoEvalColumn.precision.name].isin(precision_query)]
265
 
266
  numeric_interval = pd.IntervalIndex(sorted([NUMERIC_INTERVALS[s] for s in size_query]))
267
  params_column = pd.to_numeric(df[AutoEvalColumn.params.name], errors="coerce")
 
280
  with gr.TabItem("πŸ… LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
281
  with gr.Row():
282
  with gr.Column():
283
+ with gr.Row():
284
+ search_bar = gr.Textbox(
285
+ placeholder=" πŸ” Search for your model and press ENTER...",
286
+ show_label=False,
287
+ elem_id="search-bar",
288
+ )
289
  with gr.Row():
290
  shown_columns = gr.CheckboxGroup(
291
  choices=[
 
319
  value=True, label="Show gated/private/deleted models", interactive=True
320
  )
321
  with gr.Column(min_width=320):
 
 
 
 
 
322
  with gr.Box(elem_id="box-filter"):
323
  filter_columns_type = gr.CheckboxGroup(
324
  label="Model types",
 
337
  interactive=True,
338
  elem_id="filter-columns-type",
339
  )
340
+ filter_columns_precision = gr.CheckboxGroup(
341
+ label="Precision",
342
+ choices=["torch.float16", "torch.bfloat16", "torch.float32", "8bit", "4bit", "GPTQ"],
343
+ value=["torch.float16", "torch.bfloat16", "torch.float32", "8bit", "4bit", "GPTQ"],
344
+ interactive=True,
345
+ elem_id="filter-columns-precision",
346
+ )
347
  filter_columns_size = gr.CheckboxGroup(
348
  label="Model sizes",
349
  choices=list(NUMERIC_INTERVALS.keys()),
 
386
  leaderboard_table,
387
  shown_columns,
388
  filter_columns_type,
389
+ filter_columns_precision,
390
  filter_columns_size,
391
  deleted_models_visibility,
392
  search_bar,
 
400
  leaderboard_table,
401
  shown_columns,
402
  filter_columns_type,
403
+ filter_columns_precision,
404
  filter_columns_size,
405
  deleted_models_visibility,
406
  search_bar,
 
415
  leaderboard_table,
416
  shown_columns,
417
  filter_columns_type,
418
+ filter_columns_precision,
419
+ filter_columns_size,
420
+ deleted_models_visibility,
421
+ search_bar,
422
+ ],
423
+ leaderboard_table,
424
+ queue=True,
425
+ )
426
+ filter_columns_precision.change(
427
+ update_table,
428
+ [
429
+ hidden_leaderboard_table_for_search,
430
+ leaderboard_table,
431
+ shown_columns,
432
+ filter_columns_type,
433
+ filter_columns_precision,
434
  filter_columns_size,
435
  deleted_models_visibility,
436
  search_bar,
 
445
  leaderboard_table,
446
  shown_columns,
447
  filter_columns_type,
448
+ filter_columns_precision,
449
  filter_columns_size,
450
  deleted_models_visibility,
451
  search_bar,
 
460
  leaderboard_table,
461
  shown_columns,
462
  filter_columns_type,
463
+ filter_columns_precision,
464
  filter_columns_size,
465
  deleted_models_visibility,
466
  search_bar,
model_info_cache.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c307938f15bda18b6c38af3d02cc0407d9d8d5345bc31f475af2cbbb33a4f8b5
3
- size 2895750
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4256b2cbebd45f47d6d6316f299d760c3b3e50e4a41281c69ae44ade57bfc38c
3
+ size 3015063
model_size_cache.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd4b59351406f51675c364cf95779063458dcf3e2653239c9f4e024ed16e23f1
3
+ size 58618
requirements.txt CHANGED
@@ -1,3 +1,4 @@
 
1
  aiofiles==23.1.0
2
  aiohttp==3.8.4
3
  aiosignal==1.3.1
 
1
+ accelerate==0.23.0
2
  aiofiles==23.1.0
3
  aiohttp==3.8.4
4
  aiosignal==1.3.1
src/assets/text_content.py CHANGED
@@ -1,7 +1,7 @@
1
  from src.display_models.model_metadata_type import ModelType
2
 
3
  TITLE = """<h1 align="center" id="space-title">πŸ€— Open LLM Leaderboard</h1>
4
- <h2 align="center" id="space-title">This space displays GPT-4 and GPT-3.5 scores from [techinal paper](https://cdn.openai.com/papers/gpt-4.pdf)</h2>"""
5
 
6
  INTRODUCTION_TEXT = """
7
  πŸ“ The πŸ€— Open LLM Leaderboard aims to track, rank and evaluate open LLMs and chatbots.
 
1
  from src.display_models.model_metadata_type import ModelType
2
 
3
  TITLE = """<h1 align="center" id="space-title">πŸ€— Open LLM Leaderboard</h1>
4
+ <h2 align="center" id="space-title">This space displays GPT-4 and GPT-3.5 scores from <a href="https://cdn.openai.com/papers/gpt-4.pdf" target="_blank" rel="noopener noreferrer">techinal paper</a></h2>"""
5
 
6
  INTRODUCTION_TEXT = """
7
  πŸ“ The πŸ€— Open LLM Leaderboard aims to track, rank and evaluate open LLMs and chatbots.
src/display_models/get_model_metadata.py CHANGED
@@ -8,6 +8,8 @@ from typing import List
8
  import huggingface_hub
9
  from huggingface_hub import HfApi
10
  from tqdm import tqdm
 
 
11
 
12
  from src.display_models.model_metadata_flags import DO_NOT_SUBMIT_MODELS, FLAGGED_MODELS
13
  from src.display_models.model_metadata_type import MODEL_TYPE_METADATA, ModelType, model_type_from_str
@@ -21,8 +23,13 @@ def get_model_infos_from_hub(leaderboard_data: List[dict]):
21
  try:
22
  with open("model_info_cache.pkl", "rb") as f:
23
  model_info_cache = pickle.load(f)
24
- except EOFError:
25
  model_info_cache = {}
 
 
 
 
 
26
 
27
  for model_data in tqdm(leaderboard_data):
28
  model_name = model_data["model_name_for_query"]
@@ -37,16 +44,21 @@ def get_model_infos_from_hub(leaderboard_data: List[dict]):
37
  print("Repo not found!", model_name)
38
  model_data[AutoEvalColumn.license.name] = None
39
  model_data[AutoEvalColumn.likes.name] = None
40
- model_data[AutoEvalColumn.params.name] = get_model_size(model_name, None)
41
- continue
 
42
 
43
  model_data[AutoEvalColumn.license.name] = get_model_license(model_info)
44
  model_data[AutoEvalColumn.likes.name] = get_model_likes(model_info)
45
- model_data[AutoEvalColumn.params.name] = get_model_size(model_name, model_info)
 
 
46
 
47
  # save cache to disk in pickle format
48
  with open("model_info_cache.pkl", "wb") as f:
49
  pickle.dump(model_info_cache, f)
 
 
50
 
51
 
52
  def get_model_license(model_info):
@@ -69,11 +81,17 @@ def get_model_size(model_name, model_info):
69
  return round(model_info.safetensors["total"] / 1e9, 3)
70
  except AttributeError:
71
  try:
72
- size_match = re.search(size_pattern, model_name.lower())
73
- size = size_match.group(0)
74
- return round(float(size[:-1]) if size[-1] == "b" else float(size[:-1]) / 1e3, 3)
75
- except AttributeError:
76
- return 0
 
 
 
 
 
 
77
 
78
 
79
  def get_model_type(leaderboard_data: List[dict]):
 
8
  import huggingface_hub
9
  from huggingface_hub import HfApi
10
  from tqdm import tqdm
11
+ from transformers import AutoModel, AutoConfig
12
+ from accelerate import init_empty_weights
13
 
14
  from src.display_models.model_metadata_flags import DO_NOT_SUBMIT_MODELS, FLAGGED_MODELS
15
  from src.display_models.model_metadata_type import MODEL_TYPE_METADATA, ModelType, model_type_from_str
 
23
  try:
24
  with open("model_info_cache.pkl", "rb") as f:
25
  model_info_cache = pickle.load(f)
26
+ except (EOFError, FileNotFoundError):
27
  model_info_cache = {}
28
+ try:
29
+ with open("model_size_cache.pkl", "rb") as f:
30
+ model_size_cache = pickle.load(f)
31
+ except (EOFError, FileNotFoundError):
32
+ model_size_cache = {}
33
 
34
  for model_data in tqdm(leaderboard_data):
35
  model_name = model_data["model_name_for_query"]
 
44
  print("Repo not found!", model_name)
45
  model_data[AutoEvalColumn.license.name] = None
46
  model_data[AutoEvalColumn.likes.name] = None
47
+ if model_name not in model_size_cache:
48
+ model_size_cache[model_name] = get_model_size(model_name, None)
49
+ model_data[AutoEvalColumn.params.name] = model_size_cache[model_name]
50
 
51
  model_data[AutoEvalColumn.license.name] = get_model_license(model_info)
52
  model_data[AutoEvalColumn.likes.name] = get_model_likes(model_info)
53
+ if model_name not in model_size_cache:
54
+ model_size_cache[model_name] = get_model_size(model_name, model_info)
55
+ model_data[AutoEvalColumn.params.name] = model_size_cache[model_name]
56
 
57
  # save cache to disk in pickle format
58
  with open("model_info_cache.pkl", "wb") as f:
59
  pickle.dump(model_info_cache, f)
60
+ with open("model_size_cache.pkl", "wb") as f:
61
+ pickle.dump(model_size_cache, f)
62
 
63
 
64
  def get_model_license(model_info):
 
81
  return round(model_info.safetensors["total"] / 1e9, 3)
82
  except AttributeError:
83
  try:
84
+ config = AutoConfig.from_pretrained(model_name, trust_remote_code=False)
85
+ with init_empty_weights():
86
+ model = AutoModel.from_config(config, trust_remote_code=False)
87
+ return round(sum(p.numel() for p in model.parameters() if p.requires_grad) / 1e9, 3)
88
+ except (EnvironmentError, ValueError): # model config not found, likely private
89
+ try:
90
+ size_match = re.search(size_pattern, model_name.lower())
91
+ size = size_match.group(0)
92
+ return round(float(size[:-1]) if size[-1] == "b" else float(size[:-1]) / 1e3, 3)
93
+ except AttributeError:
94
+ return 0
95
 
96
 
97
  def get_model_type(leaderboard_data: List[dict]):