Spaces:
Runtime error
Runtime error
lixuejing
commited on
Commit
Β·
05d96a1
1
Parent(s):
89e1f12
fix bug
Browse files- src/display/utils.py +1 -0
- src/leaderboard/filter_models.py +138 -0
- src/leaderboard/read_evals.py +27 -3
src/display/utils.py
CHANGED
@@ -19,6 +19,7 @@ class ColumnContent:
|
|
19 |
displayed_by_default: bool
|
20 |
hidden: bool = False
|
21 |
never_hidden: bool = False
|
|
|
22 |
|
23 |
## Leaderboard columns
|
24 |
auto_eval_column_dict = []
|
|
|
19 |
displayed_by_default: bool
|
20 |
hidden: bool = False
|
21 |
never_hidden: bool = False
|
22 |
+
dummy: bool = False
|
23 |
|
24 |
## Leaderboard columns
|
25 |
auto_eval_column_dict = []
|
src/leaderboard/filter_models.py
ADDED
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from src.display.formatting import model_hyperlink
|
2 |
+
from src.display.utils import AutoEvalColumn
|
3 |
+
|
4 |
+
# Models which have been flagged by users as being problematic for a reason or another
|
5 |
+
# (Model name to forum discussion link)
|
6 |
+
FLAGGED_MODELS = {
|
7 |
+
"merged": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510",
|
8 |
+
"Voicelab/trurl-2-13b": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/202",
|
9 |
+
"deepnight-research/llama-2-70B-inst": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/207",
|
10 |
+
"Aspik101/trurl-2-13b-pl-instruct_unload": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/213",
|
11 |
+
"Fredithefish/ReasonixPajama-3B-HF": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/236",
|
12 |
+
"TigerResearch/tigerbot-7b-sft-v1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/237",
|
13 |
+
"gaodrew/gaodrew-gorgonzola-13b": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/215",
|
14 |
+
"AIDC-ai-business/Marcoroni-70B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/287",
|
15 |
+
"AIDC-ai-business/Marcoroni-13B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/287",
|
16 |
+
"AIDC-ai-business/Marcoroni-7B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/287",
|
17 |
+
"fblgit/una-xaberius-34b-v1beta": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/444",
|
18 |
+
"jan-hq/trinity-v1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
|
19 |
+
"rwitz2/go-bruins-v2.1.1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
|
20 |
+
"rwitz2/go-bruins-v2.1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
|
21 |
+
"GreenNode/GreenNodeLM-v3olet-7B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
|
22 |
+
"GreenNode/GreenNodeLM-7B-v4leo": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
|
23 |
+
"GreenNode/LeoScorpius-GreenNode-7B-v1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
|
24 |
+
"viethq188/LeoScorpius-7B-Chat-DPO": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
|
25 |
+
"GreenNode/GreenNodeLM-7B-v2leo": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
|
26 |
+
"janai-hq/trinity-v1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
|
27 |
+
"ignos/LeoScorpius-GreenNode-Alpaca-7B-v1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
|
28 |
+
"fblgit/una-cybertron-7b-v3-OMA": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
|
29 |
+
"mncai/mistral-7b-dpo-merge-v1.1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
|
30 |
+
"mncai/mistral-7b-dpo-v6": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
|
31 |
+
"Toten5/LeoScorpius-GreenNode-7B-v1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
|
32 |
+
"GreenNode/GreenNodeLM-7B-v1olet": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
|
33 |
+
"quantumaikr/quantum-dpo-v0.1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
|
34 |
+
"quantumaikr/quantum-v0.01": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
|
35 |
+
"quantumaikr/quantum-trinity-v0.1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
|
36 |
+
"mncai/mistral-7b-dpo-v5": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
|
37 |
+
"cookinai/BruinHermes": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
|
38 |
+
"jan-ai/Pandora-10.7B-v1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
|
39 |
+
"v1olet/v1olet_marcoroni-go-bruins-merge-7B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
|
40 |
+
"v1olet/v1olet_merged_dpo_7B_v3": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
|
41 |
+
"rwitz2/pee": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
|
42 |
+
"zyh3826 / GML-Mistral-merged-v1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/503",
|
43 |
+
"dillfrescott/trinity-medium": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
|
44 |
+
"udkai/Garrulus": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/526",
|
45 |
+
"dfurman/GarrulusMarcoro-7B-v0.1": "https://huggingface.co/dfurman/GarrulusMarcoro-7B-v0.1/discussions/1",
|
46 |
+
"udkai/Turdus": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/548",
|
47 |
+
"eren23/slerp-test-turdus-beagle": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/548",
|
48 |
+
"abideen/NexoNimbus-7B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/548",
|
49 |
+
"alnrg2arg/test2_3": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/548",
|
50 |
+
"nfaheem/Marcoroni-7b-DPO-Merge": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/548",
|
51 |
+
"CultriX/MergeTrix-7B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/548",
|
52 |
+
"liminerity/Blur-7b-v1.21": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/548",
|
53 |
+
# Merges not indicated
|
54 |
+
"gagan3012/MetaModelv2": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510",
|
55 |
+
"gagan3012/MetaModelv3": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510",
|
56 |
+
"kyujinpy/Sakura-SOLRCA-Math-Instruct-DPO-v2": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510",
|
57 |
+
"kyujinpy/Sakura-SOLAR-Instruct-DPO-v2": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510",
|
58 |
+
"kyujinpy/Sakura-SOLRCA-Math-Instruct-DPO-v1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510",
|
59 |
+
"kyujinpy/Sakura-SOLRCA-Instruct-DPO": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510",
|
60 |
+
"fblgit/LUNA-SOLARkrautLM-Instruct": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510",
|
61 |
+
"perlthoughts/Marcoroni-8x7B-v3-MoE": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510",
|
62 |
+
"rwitz/go-bruins-v2": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510",
|
63 |
+
"rwitz/go-bruins": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510",
|
64 |
+
"Walmart-the-bag/Solar-10.7B-Cato": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510",
|
65 |
+
"aqweteddy/mistral_tv-neural-marconroni": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510",
|
66 |
+
"NExtNewChattingAI/shark_tank_ai_7_b": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510",
|
67 |
+
"Q-bert/MetaMath-Cybertron": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510",
|
68 |
+
"OpenPipe/mistral-ft-optimized-1227": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510",
|
69 |
+
"perlthoughts/Falkor-7b": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510",
|
70 |
+
"v1olet/v1olet_merged_dpo_7B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510",
|
71 |
+
"Ba2han/BruinsV2-OpHermesNeu-11B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510",
|
72 |
+
"DopeorNope/You_can_cry_Snowman-13B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510",
|
73 |
+
"PistachioAlt/Synatra-MCS-7B-v0.3-RP-Slerp": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510",
|
74 |
+
"Weyaxi/MetaMath-una-cybertron-v2-bf16-Ties": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510",
|
75 |
+
"Weyaxi/OpenHermes-2.5-neural-chat-7b-v3-2-7B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510",
|
76 |
+
"perlthoughts/Falkor-8x7B-MoE": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510",
|
77 |
+
"elinas/chronos007-70b": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510",
|
78 |
+
"Weyaxi/MetaMath-NeuralHermes-2.5-Mistral-7B-Linear": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510",
|
79 |
+
"Weyaxi/MetaMath-neural-chat-7b-v3-2-Ties": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510",
|
80 |
+
"diffnamehard/Mistral-CatMacaroni-slerp-uncensored-7B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510",
|
81 |
+
"Weyaxi/neural-chat-7b-v3-1-OpenHermes-2.5-7B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510",
|
82 |
+
"Weyaxi/MetaMath-NeuralHermes-2.5-Mistral-7B-Ties": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510",
|
83 |
+
"Walmart-the-bag/Misted-7B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510",
|
84 |
+
"garage-bAInd/Camel-Platypus2-70B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510",
|
85 |
+
"Weyaxi/OpenOrca-Zephyr-7B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510",
|
86 |
+
"uukuguy/speechless-mistral-7b-dare-0.85": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510",
|
87 |
+
"DopeorNope/SOLARC-M-10.7B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/511",
|
88 |
+
"cloudyu/Mixtral_11Bx2_MoE_19B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/511",
|
89 |
+
"DopeorNope/SOLARC-MOE-10.7Bx6 ": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/511",
|
90 |
+
"DopeorNope/SOLARC-MOE-10.7Bx4": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/511",
|
91 |
+
"gagan3012/MetaModelv2 ": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/511",
|
92 |
+
}
|
93 |
+
|
94 |
+
# Models which have been requested by orgs to not be submitted on the leaderboard
|
95 |
+
DO_NOT_SUBMIT_MODELS = [
|
96 |
+
"Voicelab/trurl-2-13b", # trained on MMLU
|
97 |
+
"TigerResearch/tigerbot-70b-chat", # per authors request
|
98 |
+
"TigerResearch/tigerbot-70b-chat-v2", # per authors request
|
99 |
+
"TigerResearch/tigerbot-70b-chat-v4-4k", # per authors request
|
100 |
+
]
|
101 |
+
|
102 |
+
|
103 |
+
def flag_models(leaderboard_data: list[dict]):
|
104 |
+
for model_data in leaderboard_data:
|
105 |
+
# Merges and moes are flagged automatically
|
106 |
+
if model_data[AutoEvalColumn.flagged.name] == True:
|
107 |
+
flag_key = "merged"
|
108 |
+
else:
|
109 |
+
flag_key = model_data["model_name_for_query"]
|
110 |
+
|
111 |
+
if flag_key in FLAGGED_MODELS:
|
112 |
+
issue_num = FLAGGED_MODELS[flag_key].split("/")[-1]
|
113 |
+
issue_link = model_hyperlink(
|
114 |
+
FLAGGED_MODELS[flag_key],
|
115 |
+
f"See discussion #{issue_num}",
|
116 |
+
)
|
117 |
+
model_data[
|
118 |
+
AutoEvalColumn.model.name
|
119 |
+
] = f"{model_data[AutoEvalColumn.model.name]} has been flagged! {issue_link}"
|
120 |
+
model_data[AutoEvalColumn.flagged.name] = True
|
121 |
+
else:
|
122 |
+
model_data[AutoEvalColumn.flagged.name] = False
|
123 |
+
|
124 |
+
|
125 |
+
def remove_forbidden_models(leaderboard_data: list[dict]):
|
126 |
+
indices_to_remove = []
|
127 |
+
for ix, model in enumerate(leaderboard_data):
|
128 |
+
if model["model_name_for_query"] in DO_NOT_SUBMIT_MODELS:
|
129 |
+
indices_to_remove.append(ix)
|
130 |
+
|
131 |
+
for ix in reversed(indices_to_remove):
|
132 |
+
leaderboard_data.pop(ix)
|
133 |
+
return leaderboard_data
|
134 |
+
|
135 |
+
|
136 |
+
def filter_models_flags(leaderboard_data: list[dict]):
|
137 |
+
leaderboard_data = remove_forbidden_models(leaderboard_data)
|
138 |
+
flag_models(leaderboard_data)
|
src/leaderboard/read_evals.py
CHANGED
@@ -31,6 +31,10 @@ class EvalResult:
|
|
31 |
num_params: int = 0
|
32 |
date: str = "" # submission date of request file
|
33 |
still_on_hub: bool = False
|
|
|
|
|
|
|
|
|
34 |
|
35 |
@classmethod
|
36 |
def init_from_json_file(self, json_filepath):
|
@@ -104,12 +108,22 @@ class EvalResult:
|
|
104 |
self.likes = request.get("likes", 0)
|
105 |
self.num_params = request.get("params", 0)
|
106 |
self.date = request.get("submitted_time", "")
|
|
|
|
|
107 |
except Exception:
|
|
|
108 |
print(f"Could not find request file for {self.org}/{self.model} with precision {self.precision.value.name}")
|
109 |
|
110 |
def to_dict(self):
|
111 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
112 |
-
average =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
data_dict = {
|
114 |
"eval_name": self.eval_name, # not a column, just a save name,
|
115 |
AutoEvalColumn.precision.name: self.precision.value.name,
|
@@ -118,20 +132,30 @@ class EvalResult:
|
|
118 |
AutoEvalColumn.weight_type.name: self.weight_type.value.name,
|
119 |
AutoEvalColumn.architecture.name: self.architecture,
|
120 |
AutoEvalColumn.model.name: make_clickable_model(self.full_model),
|
|
|
121 |
AutoEvalColumn.revision.name: self.revision,
|
122 |
AutoEvalColumn.average.name: average,
|
123 |
AutoEvalColumn.license.name: self.license,
|
124 |
AutoEvalColumn.likes.name: self.likes,
|
125 |
AutoEvalColumn.params.name: self.num_params,
|
126 |
AutoEvalColumn.still_on_hub.name: self.still_on_hub,
|
|
|
|
|
|
|
127 |
}
|
128 |
|
129 |
for task in Tasks:
|
130 |
-
data_dict[task.value.col_name] = self.results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
|
132 |
return data_dict
|
133 |
|
134 |
-
|
135 |
def get_request_file_for_model(requests_path, model_name, precision):
|
136 |
"""Selects the correct request file for a given model. Only keeps runs tagged as FINISHED"""
|
137 |
request_files = os.path.join(
|
|
|
31 |
num_params: int = 0
|
32 |
date: str = "" # submission date of request file
|
33 |
still_on_hub: bool = False
|
34 |
+
is_merge: bool = False
|
35 |
+
flagged: bool = False
|
36 |
+
status: str = "FINISHED"
|
37 |
+
tags: list = None
|
38 |
|
39 |
@classmethod
|
40 |
def init_from_json_file(self, json_filepath):
|
|
|
108 |
self.likes = request.get("likes", 0)
|
109 |
self.num_params = request.get("params", 0)
|
110 |
self.date = request.get("submitted_time", "")
|
111 |
+
self.architecture = request.get("architectures", "Unknown")
|
112 |
+
self.status = request.get("status", "FAILED")
|
113 |
except Exception:
|
114 |
+
self.status = "FAILED"
|
115 |
print(f"Could not find request file for {self.org}/{self.model} with precision {self.precision.value.name}")
|
116 |
|
117 |
def to_dict(self):
|
118 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
119 |
+
average = 0
|
120 |
+
nums = 0
|
121 |
+
for v in self.results.values():
|
122 |
+
if v is not None and v != 0:
|
123 |
+
average += v
|
124 |
+
nums += 1
|
125 |
+
average = average/nums
|
126 |
+
|
127 |
data_dict = {
|
128 |
"eval_name": self.eval_name, # not a column, just a save name,
|
129 |
AutoEvalColumn.precision.name: self.precision.value.name,
|
|
|
132 |
AutoEvalColumn.weight_type.name: self.weight_type.value.name,
|
133 |
AutoEvalColumn.architecture.name: self.architecture,
|
134 |
AutoEvalColumn.model.name: make_clickable_model(self.full_model),
|
135 |
+
AutoEvalColumn.dummy.name: self.full_model,
|
136 |
AutoEvalColumn.revision.name: self.revision,
|
137 |
AutoEvalColumn.average.name: average,
|
138 |
AutoEvalColumn.license.name: self.license,
|
139 |
AutoEvalColumn.likes.name: self.likes,
|
140 |
AutoEvalColumn.params.name: self.num_params,
|
141 |
AutoEvalColumn.still_on_hub.name: self.still_on_hub,
|
142 |
+
AutoEvalColumn.merged.name: "merge" in self.tags if self.tags else False,
|
143 |
+
AutoEvalColumn.moe.name: ("moe" in self.tags if self.tags else False) or "moe" in self.full_model.lower(),
|
144 |
+
AutoEvalColumn.flagged.name: self.flagged
|
145 |
}
|
146 |
|
147 |
for task in Tasks:
|
148 |
+
#data_dict[task.value.col_name] = self.results.get(task.value.benchmark, 0)
|
149 |
+
if task.value.col_name != "CLCC-H":
|
150 |
+
data_dict[task.value.col_name] = self.results.get(task.value.benchmark, 0)
|
151 |
+
else:
|
152 |
+
if self.results.get(task.value.benchmark, 0) == 0:
|
153 |
+
data_dict[task.value.col_name] = "-"
|
154 |
+
else:
|
155 |
+
data_dict[task.value.col_name] = "%.2f" % self.results.get(task.value.benchmark, 0)
|
156 |
|
157 |
return data_dict
|
158 |
|
|
|
159 |
def get_request_file_for_model(requests_path, model_name, precision):
|
160 |
"""Selects the correct request file for a given model. Only keeps runs tagged as FINISHED"""
|
161 |
request_files = os.path.join(
|