Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Tom Aarsen
commited on
Commit
•
bd6a61b
1
Parent(s):
5c90ee9
Compute model size based on number of parameters
Browse files- app.py +100 -126
- utils/__init__.py +0 -0
- utils/model_size.py +39 -0
app.py
CHANGED
@@ -4,11 +4,13 @@ import os
|
|
4 |
|
5 |
from datasets import load_dataset
|
6 |
import gradio as gr
|
7 |
-
from huggingface_hub import
|
8 |
from huggingface_hub.repocard import metadata_load
|
9 |
import pandas as pd
|
10 |
from tqdm.autonotebook import tqdm
|
11 |
|
|
|
|
|
12 |
TASKS = [
|
13 |
"BitextMining",
|
14 |
"Classification",
|
@@ -786,94 +788,94 @@ EXTERNAL_MODEL_TO_SEQLEN = {
|
|
786 |
}
|
787 |
|
788 |
EXTERNAL_MODEL_TO_SIZE = {
|
789 |
-
"allenai-specter":
|
790 |
-
"all-MiniLM-L12-v2":
|
791 |
-
"all-MiniLM-L6-v2":
|
792 |
-
"all-mpnet-base-v2":
|
793 |
-
"bert-base-10lang-cased":
|
794 |
-
"bert-base-15lang-cased":
|
795 |
-
"bert-base-25lang-cased":
|
796 |
-
"bert-base-multilingual-cased":
|
797 |
-
"bert-base-multilingual-uncased":
|
798 |
-
"bert-base-uncased":
|
799 |
-
"bert-base-swedish-cased":
|
800 |
-
"bge-base-zh-v1.5":
|
801 |
-
"bge-large-zh-v1.5":
|
802 |
-
"bge-large-zh-noinstruct":
|
803 |
-
"bge-small-zh-v1.5":
|
804 |
-
"camembert-base":
|
805 |
-
"camembert-large":
|
806 |
-
"cross-en-de-roberta-sentence-transformer":
|
807 |
-
"contriever-base-msmarco":
|
808 |
-
"distilbert-base-25lang-cased":
|
809 |
-
"distilbert-base-en-fr-cased":
|
810 |
-
"distilbert-base-en-fr-es-pt-it-cased":
|
811 |
-
"distilbert-base-fr-cased":
|
812 |
-
"distilbert-base-uncased":
|
813 |
-
"DanskBERT":
|
814 |
-
"distiluse-base-multilingual-cased-v2":
|
815 |
-
"dfm-encoder-large-v1":
|
816 |
-
"dfm-sentence-encoder-large-1":
|
817 |
-
"e5-base":
|
818 |
-
"e5-large":
|
819 |
-
"e5-mistral-7b-instruct":
|
820 |
-
"e5-small":
|
821 |
-
"electra-small-nordic":
|
822 |
-
"electra-small-swedish-cased-discriminator":
|
823 |
-
"flaubert_base_cased":
|
824 |
-
"flaubert_base_uncased":
|
825 |
-
"flaubert_large_cased":
|
826 |
-
"gbert-base":
|
827 |
-
"gbert-large":
|
828 |
-
"gelectra-base":
|
829 |
-
"gelectra-large":
|
830 |
-
"glove.6B.300d":
|
831 |
-
"gottbert-base":
|
832 |
-
"gtr-t5-base":
|
833 |
-
"gtr-t5-large":
|
834 |
-
"gtr-t5-xl":
|
835 |
-
"gtr-t5-xxl":
|
836 |
-
"herbert-base-retrieval-v2":
|
837 |
-
"komninos":
|
838 |
-
"luotuo-bert-medium":
|
839 |
-
"LASER2":
|
840 |
-
"LaBSE":
|
841 |
-
"m3e-base":
|
842 |
-
"m3e-large":
|
843 |
-
"msmarco-bert-co-condensor":
|
844 |
-
"multi-qa-MiniLM-L6-cos-v1":
|
845 |
-
"multilingual-e5-base":
|
846 |
-
"multilingual-e5-small":
|
847 |
-
"multilingual-e5-large":
|
848 |
-
"nb-bert-base":
|
849 |
-
"nb-bert-large":
|
850 |
-
"nomic-embed-text-v1.5-64":
|
851 |
-
"nomic-embed-text-v1.5-128":
|
852 |
-
"nomic-embed-text-v1.5-256":
|
853 |
-
"nomic-embed-text-v1.5-512":
|
854 |
-
"norbert3-base":
|
855 |
-
"norbert3-large":
|
856 |
-
"paraphrase-multilingual-mpnet-base-v2":
|
857 |
-
"paraphrase-multilingual-MiniLM-L12-v2":
|
858 |
-
"sentence-camembert-base":
|
859 |
-
"sentence-camembert-large":
|
860 |
-
"sentence-croissant-llm-base":
|
861 |
-
"sentence-bert-swedish-cased":
|
862 |
-
"sentence-t5-base":
|
863 |
-
"sentence-t5-large":
|
864 |
-
"sentence-t5-xl":
|
865 |
-
"sentence-t5-xxl":
|
866 |
-
"silver-retriever-base-v1":
|
867 |
-
"sup-simcse-bert-base-uncased":
|
868 |
-
"st-polish-paraphrase-from-distilroberta":
|
869 |
-
"st-polish-paraphrase-from-mpnet":
|
870 |
-
"text2vec-base-chinese":
|
871 |
-
"text2vec-large-chinese":
|
872 |
-
"unsup-simcse-bert-base-uncased":
|
873 |
-
"use-cmlm-multilingual":
|
874 |
-
"voyage-lite-02-instruct":
|
875 |
-
"xlm-roberta-base":
|
876 |
-
"xlm-roberta-large":
|
877 |
}
|
878 |
|
879 |
MODELS_TO_SKIP = {
|
@@ -997,6 +999,7 @@ MODELS_TO_SKIP = {
|
|
997 |
"beademiguelperez/sentence-transformers-multilingual-e5-small",
|
998 |
"arcdev/SFR-Embedding-Mistral",
|
999 |
"arcdev/e5-mistral-7b-instruct",
|
|
|
1000 |
}
|
1001 |
|
1002 |
def add_lang(examples):
|
@@ -1079,36 +1082,7 @@ def get_dim_seq_size(model):
|
|
1079 |
dim = config.get("hidden_dim", config.get("hidden_size", config.get("d_model", "")))
|
1080 |
seq = config.get("n_positions", config.get("max_position_embeddings", config.get("n_ctx", config.get("seq_length", ""))))
|
1081 |
# Get model file size without downloading
|
1082 |
-
|
1083 |
-
url = hf_hub_url(model.modelId, filename="pytorch_model.bin")
|
1084 |
-
meta = get_hf_file_metadata(url)
|
1085 |
-
size = round(meta.size / 1e9, 2)
|
1086 |
-
elif "pytorch_model.bin.index.json" in filenames:
|
1087 |
-
index_path = hf_hub_download(model.modelId, filename="pytorch_model.bin.index.json")
|
1088 |
-
"""
|
1089 |
-
{
|
1090 |
-
"metadata": {
|
1091 |
-
"total_size": 28272820224
|
1092 |
-
},....
|
1093 |
-
"""
|
1094 |
-
size = json.load(open(index_path))
|
1095 |
-
if ("metadata" in size) and ("total_size" in size["metadata"]):
|
1096 |
-
size = round(size["metadata"]["total_size"] / 1e9, 2)
|
1097 |
-
elif "model.safetensors" in filenames:
|
1098 |
-
url = hf_hub_url(model.modelId, filename="model.safetensors")
|
1099 |
-
meta = get_hf_file_metadata(url)
|
1100 |
-
size = round(meta.size / 1e9, 2)
|
1101 |
-
elif "model.safetensors.index.json" in filenames:
|
1102 |
-
index_path = hf_hub_download(model.modelId, filename="model.safetensors.index.json")
|
1103 |
-
"""
|
1104 |
-
{
|
1105 |
-
"metadata": {
|
1106 |
-
"total_size": 14483464192
|
1107 |
-
},....
|
1108 |
-
"""
|
1109 |
-
size = json.load(open(index_path))
|
1110 |
-
if ("metadata" in size) and ("total_size" in size["metadata"]):
|
1111 |
-
size = round(size["metadata"]["total_size"] / 1e9, 2)
|
1112 |
return dim, seq, size
|
1113 |
|
1114 |
def make_datasets_clickable(df):
|
@@ -1120,7 +1094,7 @@ def make_datasets_clickable(df):
|
|
1120 |
return df
|
1121 |
|
1122 |
def add_rank(df):
|
1123 |
-
cols_to_rank = [col for col in df.columns if col not in ["Model", "Model Size (
|
1124 |
if len(cols_to_rank) == 1:
|
1125 |
df.sort_values(cols_to_rank[0], ascending=False, inplace=True)
|
1126 |
else:
|
@@ -1150,7 +1124,7 @@ def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_
|
|
1150 |
# Model & at least one result
|
1151 |
if len(res) > 1:
|
1152 |
if add_emb_dim:
|
1153 |
-
res["Model Size (
|
1154 |
res["Embedding Dimensions"] = EXTERNAL_MODEL_TO_DIM.get(model, "")
|
1155 |
res["Max Tokens"] = EXTERNAL_MODEL_TO_SEQLEN.get(model, "")
|
1156 |
df_list.append(res)
|
@@ -1191,7 +1165,7 @@ def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_
|
|
1191 |
if add_emb_dim:
|
1192 |
try:
|
1193 |
# Fails on gated repos, so we only include scores for them
|
1194 |
-
out["Embedding Dimensions"], out["Max Tokens"], out["Model Size (
|
1195 |
except:
|
1196 |
pass
|
1197 |
df_list.append(out)
|
@@ -1268,7 +1242,7 @@ def get_mteb_average():
|
|
1268 |
# Fill NaN after averaging
|
1269 |
DATA_OVERALL.fillna("", inplace=True)
|
1270 |
|
1271 |
-
DATA_OVERALL = DATA_OVERALL[["Rank", "Model", "Model Size (
|
1272 |
DATA_OVERALL = DATA_OVERALL[DATA_OVERALL.iloc[:, 5:].ne("").any(axis=1)]
|
1273 |
|
1274 |
return DATA_OVERALL
|
@@ -1327,7 +1301,7 @@ def get_mteb_average_zh():
|
|
1327 |
# Fill NaN after averaging
|
1328 |
DATA_OVERALL_ZH.fillna("", inplace=True)
|
1329 |
|
1330 |
-
DATA_OVERALL_ZH = DATA_OVERALL_ZH[["Rank", "Model", "Model Size (
|
1331 |
DATA_OVERALL_ZH = DATA_OVERALL_ZH[DATA_OVERALL_ZH.iloc[:, 5:].ne("").any(axis=1)]
|
1332 |
|
1333 |
return DATA_OVERALL_ZH
|
@@ -1389,7 +1363,7 @@ def get_mteb_average_fr():
|
|
1389 |
# Fill NaN after averaging
|
1390 |
DATA_OVERALL_FR.fillna("", inplace=True)
|
1391 |
|
1392 |
-
DATA_OVERALL_FR = DATA_OVERALL_FR[["Rank", "Model", "Model Size (
|
1393 |
DATA_OVERALL_FR = DATA_OVERALL_FR[DATA_OVERALL_FR.iloc[:, 5:].ne("").any(axis=1)]
|
1394 |
|
1395 |
return DATA_OVERALL_FR
|
@@ -1443,7 +1417,7 @@ def get_mteb_average_pl():
|
|
1443 |
# Fill NaN after averaging
|
1444 |
DATA_OVERALL_PL.fillna("", inplace=True)
|
1445 |
|
1446 |
-
DATA_OVERALL_PL = DATA_OVERALL_PL[["Rank", "Model", "Model Size (
|
1447 |
DATA_OVERALL_PL = DATA_OVERALL_PL[DATA_OVERALL_PL.iloc[:, 5:].ne("").any(axis=1)]
|
1448 |
|
1449 |
return DATA_OVERALL_PL
|
|
|
4 |
|
5 |
from datasets import load_dataset
|
6 |
import gradio as gr
|
7 |
+
from huggingface_hub import HfApi, hf_hub_download
|
8 |
from huggingface_hub.repocard import metadata_load
|
9 |
import pandas as pd
|
10 |
from tqdm.autonotebook import tqdm
|
11 |
|
12 |
+
from utils.model_size import get_model_size
|
13 |
+
|
14 |
TASKS = [
|
15 |
"BitextMining",
|
16 |
"Classification",
|
|
|
788 |
}
|
789 |
|
790 |
EXTERNAL_MODEL_TO_SIZE = {
|
791 |
+
"allenai-specter": 110,
|
792 |
+
"all-MiniLM-L12-v2": 33,
|
793 |
+
"all-MiniLM-L6-v2": 23,
|
794 |
+
"all-mpnet-base-v2": 110,
|
795 |
+
"bert-base-10lang-cased": 138,
|
796 |
+
"bert-base-15lang-cased": 138,
|
797 |
+
"bert-base-25lang-cased": 138,
|
798 |
+
"bert-base-multilingual-cased": 179,
|
799 |
+
"bert-base-multilingual-uncased": 168,
|
800 |
+
"bert-base-uncased": 110,
|
801 |
+
"bert-base-swedish-cased": 125,
|
802 |
+
"bge-base-zh-v1.5": 102,
|
803 |
+
"bge-large-zh-v1.5": 326,
|
804 |
+
"bge-large-zh-noinstruct": 326,
|
805 |
+
"bge-small-zh-v1.5": 24,
|
806 |
+
"camembert-base": 111,
|
807 |
+
"camembert-large": 338,
|
808 |
+
"cross-en-de-roberta-sentence-transformer": 278,
|
809 |
+
"contriever-base-msmarco": 110,
|
810 |
+
"distilbert-base-25lang-cased": 110,
|
811 |
+
"distilbert-base-en-fr-cased": 110,
|
812 |
+
"distilbert-base-en-fr-es-pt-it-cased": 110,
|
813 |
+
"distilbert-base-fr-cased": 110,
|
814 |
+
"distilbert-base-uncased": 110,
|
815 |
+
"DanskBERT": 125,
|
816 |
+
"distiluse-base-multilingual-cased-v2": 135,
|
817 |
+
"dfm-encoder-large-v1": 355,
|
818 |
+
"dfm-sentence-encoder-large-1": 355,
|
819 |
+
"e5-base": 110,
|
820 |
+
"e5-large": 335,
|
821 |
+
"e5-mistral-7b-instruct": 7110,
|
822 |
+
"e5-small": 33,
|
823 |
+
"electra-small-nordic": 23,
|
824 |
+
"electra-small-swedish-cased-discriminator": 16,
|
825 |
+
"flaubert_base_cased": 138,
|
826 |
+
"flaubert_base_uncased": 138,
|
827 |
+
"flaubert_large_cased": 372,
|
828 |
+
"gbert-base": 110,
|
829 |
+
"gbert-large": 337,
|
830 |
+
"gelectra-base": 110,
|
831 |
+
"gelectra-large": 335,
|
832 |
+
"glove.6B.300d": 120,
|
833 |
+
"gottbert-base": 127,
|
834 |
+
"gtr-t5-base": 110,
|
835 |
+
"gtr-t5-large": 168,
|
836 |
+
"gtr-t5-xl": 1240,
|
837 |
+
"gtr-t5-xxl": 4865,
|
838 |
+
"herbert-base-retrieval-v2": 125,
|
839 |
+
"komninos": 134,
|
840 |
+
"luotuo-bert-medium": 328,
|
841 |
+
"LASER2": 43,
|
842 |
+
"LaBSE": 471,
|
843 |
+
"m3e-base": 102,
|
844 |
+
"m3e-large": 102,
|
845 |
+
"msmarco-bert-co-condensor": 110,
|
846 |
+
"multi-qa-MiniLM-L6-cos-v1": 23,
|
847 |
+
"multilingual-e5-base": 278,
|
848 |
+
"multilingual-e5-small": 118,
|
849 |
+
"multilingual-e5-large": 560,
|
850 |
+
"nb-bert-base": 179,
|
851 |
+
"nb-bert-large": 355,
|
852 |
+
"nomic-embed-text-v1.5-64": 138,
|
853 |
+
"nomic-embed-text-v1.5-128": 138,
|
854 |
+
"nomic-embed-text-v1.5-256": 138,
|
855 |
+
"nomic-embed-text-v1.5-512": 138,
|
856 |
+
"norbert3-base": 131,
|
857 |
+
"norbert3-large": 368,
|
858 |
+
"paraphrase-multilingual-mpnet-base-v2": 278,
|
859 |
+
"paraphrase-multilingual-MiniLM-L12-v2": 118,
|
860 |
+
"sentence-camembert-base": 110,
|
861 |
+
"sentence-camembert-large": 337,
|
862 |
+
"sentence-croissant-llm-base": 1280,
|
863 |
+
"sentence-bert-swedish-cased": 125,
|
864 |
+
"sentence-t5-base": 110,
|
865 |
+
"sentence-t5-large": 168,
|
866 |
+
"sentence-t5-xl": 1240,
|
867 |
+
"sentence-t5-xxl": 4865,
|
868 |
+
"silver-retriever-base-v1": 125,
|
869 |
+
"sup-simcse-bert-base-uncased": 110,
|
870 |
+
"st-polish-paraphrase-from-distilroberta": 125,
|
871 |
+
"st-polish-paraphrase-from-mpnet": 125,
|
872 |
+
"text2vec-base-chinese": 102,
|
873 |
+
"text2vec-large-chinese": 326,
|
874 |
+
"unsup-simcse-bert-base-uncased": 110,
|
875 |
+
"use-cmlm-multilingual": 472,
|
876 |
+
"voyage-lite-02-instruct": 613,
|
877 |
+
"xlm-roberta-base": 279,
|
878 |
+
"xlm-roberta-large": 560,
|
879 |
}
|
880 |
|
881 |
MODELS_TO_SKIP = {
|
|
|
999 |
"beademiguelperez/sentence-transformers-multilingual-e5-small",
|
1000 |
"arcdev/SFR-Embedding-Mistral",
|
1001 |
"arcdev/e5-mistral-7b-instruct",
|
1002 |
+
"Koat/gte-tiny",
|
1003 |
}
|
1004 |
|
1005 |
def add_lang(examples):
|
|
|
1082 |
dim = config.get("hidden_dim", config.get("hidden_size", config.get("d_model", "")))
|
1083 |
seq = config.get("n_positions", config.get("max_position_embeddings", config.get("n_ctx", config.get("seq_length", ""))))
|
1084 |
# Get model file size without downloading
|
1085 |
+
size = get_model_size(model)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1086 |
return dim, seq, size
|
1087 |
|
1088 |
def make_datasets_clickable(df):
|
|
|
1094 |
return df
|
1095 |
|
1096 |
def add_rank(df):
|
1097 |
+
cols_to_rank = [col for col in df.columns if col not in ["Model", "Model Size (Million Parameters)", "Embedding Dimensions", "Max Tokens"]]
|
1098 |
if len(cols_to_rank) == 1:
|
1099 |
df.sort_values(cols_to_rank[0], ascending=False, inplace=True)
|
1100 |
else:
|
|
|
1124 |
# Model & at least one result
|
1125 |
if len(res) > 1:
|
1126 |
if add_emb_dim:
|
1127 |
+
res["Model Size (Million Parameters)"] = EXTERNAL_MODEL_TO_SIZE.get(model, "")
|
1128 |
res["Embedding Dimensions"] = EXTERNAL_MODEL_TO_DIM.get(model, "")
|
1129 |
res["Max Tokens"] = EXTERNAL_MODEL_TO_SEQLEN.get(model, "")
|
1130 |
df_list.append(res)
|
|
|
1165 |
if add_emb_dim:
|
1166 |
try:
|
1167 |
# Fails on gated repos, so we only include scores for them
|
1168 |
+
out["Embedding Dimensions"], out["Max Tokens"], out["Model Size (Million Parameters)"] = get_dim_seq_size(model)
|
1169 |
except:
|
1170 |
pass
|
1171 |
df_list.append(out)
|
|
|
1242 |
# Fill NaN after averaging
|
1243 |
DATA_OVERALL.fillna("", inplace=True)
|
1244 |
|
1245 |
+
DATA_OVERALL = DATA_OVERALL[["Rank", "Model", "Model Size (Million Parameters)", "Embedding Dimensions", "Max Tokens", f"Average ({len(TASK_LIST_EN)} datasets)", f"Classification Average ({len(TASK_LIST_CLASSIFICATION)} datasets)", f"Clustering Average ({len(TASK_LIST_CLUSTERING)} datasets)", f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION)} datasets)", f"Reranking Average ({len(TASK_LIST_RERANKING)} datasets)", f"Retrieval Average ({len(TASK_LIST_RETRIEVAL)} datasets)", f"STS Average ({len(TASK_LIST_STS)} datasets)", f"Summarization Average ({len(TASK_LIST_SUMMARIZATION)} dataset)"]]
|
1246 |
DATA_OVERALL = DATA_OVERALL[DATA_OVERALL.iloc[:, 5:].ne("").any(axis=1)]
|
1247 |
|
1248 |
return DATA_OVERALL
|
|
|
1301 |
# Fill NaN after averaging
|
1302 |
DATA_OVERALL_ZH.fillna("", inplace=True)
|
1303 |
|
1304 |
+
DATA_OVERALL_ZH = DATA_OVERALL_ZH[["Rank", "Model", "Model Size (Million Parameters)", "Embedding Dimensions", "Max Tokens", f"Average ({len(TASK_LIST_ZH)} datasets)", f"Classification Average ({len(TASK_LIST_CLASSIFICATION_ZH)} datasets)", f"Clustering Average ({len(TASK_LIST_CLUSTERING_ZH)} datasets)", f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION_ZH)} datasets)", f"Reranking Average ({len(TASK_LIST_RERANKING_ZH)} datasets)", f"Retrieval Average ({len(TASK_LIST_RETRIEVAL_ZH)} datasets)", f"STS Average ({len(TASK_LIST_STS_ZH)} datasets)"]]
|
1305 |
DATA_OVERALL_ZH = DATA_OVERALL_ZH[DATA_OVERALL_ZH.iloc[:, 5:].ne("").any(axis=1)]
|
1306 |
|
1307 |
return DATA_OVERALL_ZH
|
|
|
1363 |
# Fill NaN after averaging
|
1364 |
DATA_OVERALL_FR.fillna("", inplace=True)
|
1365 |
|
1366 |
+
DATA_OVERALL_FR = DATA_OVERALL_FR[["Rank", "Model", "Model Size (Million Parameters)", "Embedding Dimensions", "Max Tokens", f"Average ({len(TASK_LIST_FR)} datasets)", f"Classification Average ({len(TASK_LIST_CLASSIFICATION_FR)} datasets)", f"Clustering Average ({len(TASK_LIST_CLUSTERING_FR)} datasets)", f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION_FR)} datasets)", f"Reranking Average ({len(TASK_LIST_RERANKING_FR)} datasets)", f"Retrieval Average ({len(TASK_LIST_RETRIEVAL_FR)} datasets)", f"STS Average ({len(TASK_LIST_STS_FR)} datasets)", f"Summarization Average ({len(TASK_LIST_SUMMARIZATION_FR)} dataset)"]]
|
1367 |
DATA_OVERALL_FR = DATA_OVERALL_FR[DATA_OVERALL_FR.iloc[:, 5:].ne("").any(axis=1)]
|
1368 |
|
1369 |
return DATA_OVERALL_FR
|
|
|
1417 |
# Fill NaN after averaging
|
1418 |
DATA_OVERALL_PL.fillna("", inplace=True)
|
1419 |
|
1420 |
+
DATA_OVERALL_PL = DATA_OVERALL_PL[["Rank", "Model", "Model Size (Million Parameters)", "Embedding Dimensions", "Max Tokens", f"Average ({len(TASK_LIST_PL)} datasets)", f"Classification Average ({len(TASK_LIST_CLASSIFICATION_PL)} datasets)", f"Clustering Average ({len(TASK_LIST_CLUSTERING_PL)} datasets)", f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION_PL)} datasets)", f"Retrieval Average ({len(TASK_LIST_RETRIEVAL_PL)} datasets)", f"STS Average ({len(TASK_LIST_STS_PL)} datasets)"]]
|
1421 |
DATA_OVERALL_PL = DATA_OVERALL_PL[DATA_OVERALL_PL.iloc[:, 5:].ne("").any(axis=1)]
|
1422 |
|
1423 |
return DATA_OVERALL_PL
|
utils/__init__.py
ADDED
File without changes
|
utils/model_size.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import re
|
3 |
+
from huggingface_hub.hf_api import ModelInfo, get_safetensors_metadata, model_info as get_model_info, get_hf_file_metadata, hf_hub_url
|
4 |
+
from huggingface_hub import hf_hub_download
|
5 |
+
|
6 |
+
# Map model IDs to the number of bytes used for one parameter. So, 4 bytes for fp32, 2 bytes for fp16, etc.
|
7 |
+
# By default, we assume that the model is stored in fp32.
|
8 |
+
KNOWN_BYTES_PER_PARAM = {}
|
9 |
+
|
10 |
+
|
11 |
+
def get_model_size(model_info: ModelInfo):
|
12 |
+
'''Get the size of the model in million of parameters.'''
|
13 |
+
try:
|
14 |
+
safetensors = get_safetensors_metadata(model_info.id)
|
15 |
+
return round(sum(safetensors.parameter_count.values()) / 1e6)
|
16 |
+
except Exception as e:
|
17 |
+
pass
|
18 |
+
|
19 |
+
filenames = [sib.rfilename for sib in model_info.siblings]
|
20 |
+
if "pytorch_model.bin" in filenames:
|
21 |
+
url = hf_hub_url(model_info.id, filename="pytorch_model.bin")
|
22 |
+
meta = get_hf_file_metadata(url)
|
23 |
+
bytes_per_param = KNOWN_BYTES_PER_PARAM.get(model_info.id, 4)
|
24 |
+
return round(meta.size / bytes_per_param / 1e6)
|
25 |
+
|
26 |
+
if "pytorch_model.bin.index.json" in filenames:
|
27 |
+
index_path = hf_hub_download(model_info.id, filename="pytorch_model.bin.index.json")
|
28 |
+
"""
|
29 |
+
{
|
30 |
+
"metadata": {
|
31 |
+
"total_size": 28272820224
|
32 |
+
},....
|
33 |
+
"""
|
34 |
+
size = json.load(open(index_path))
|
35 |
+
bytes_per_param = KNOWN_BYTES_PER_PARAM.get(model_info.id, 4)
|
36 |
+
if ("metadata" in size) and ("total_size" in size["metadata"]):
|
37 |
+
return round(size["metadata"]["total_size"] / bytes_per_param / 1e6)
|
38 |
+
|
39 |
+
return None
|