Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Update src/leaderboard/read_evals.py
Browse files
src/leaderboard/read_evals.py
CHANGED
@@ -109,6 +109,7 @@ class EvalResult:
|
|
109 |
|
110 |
def to_dict(self):
|
111 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
|
|
112 |
data_dict = {
|
113 |
"eval_name": self.eval_name, # not a column, just a save name,
|
114 |
AutoEvalColumn.precision.name: self.precision.value.name,
|
@@ -118,15 +119,16 @@ class EvalResult:
|
|
118 |
AutoEvalColumn.architecture.name: self.architecture,
|
119 |
AutoEvalColumn.model.name: make_clickable_model(self.full_model),
|
120 |
AutoEvalColumn.revision.name: self.revision,
|
|
|
121 |
AutoEvalColumn.license.name: self.license,
|
122 |
AutoEvalColumn.likes.name: self.likes,
|
123 |
AutoEvalColumn.params.name: self.num_params,
|
124 |
AutoEvalColumn.still_on_hub.name: self.still_on_hub,
|
125 |
}
|
126 |
-
|
|
|
127 |
for task in Tasks:
|
128 |
-
|
129 |
-
data_dict[task_value.col_name] = self.results[task_value.benchmark]
|
130 |
|
131 |
return data_dict
|
132 |
|
@@ -185,7 +187,6 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
|
|
185 |
|
186 |
results = []
|
187 |
for v in eval_results.values():
|
188 |
-
print("v", v)
|
189 |
try:
|
190 |
v.to_dict() # we test if the dict version is complete
|
191 |
results.append(v)
|
|
|
109 |
|
110 |
def to_dict(self):
|
111 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
112 |
+
average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
|
113 |
data_dict = {
|
114 |
"eval_name": self.eval_name, # not a column, just a save name,
|
115 |
AutoEvalColumn.precision.name: self.precision.value.name,
|
|
|
119 |
AutoEvalColumn.architecture.name: self.architecture,
|
120 |
AutoEvalColumn.model.name: make_clickable_model(self.full_model),
|
121 |
AutoEvalColumn.revision.name: self.revision,
|
122 |
+
AutoEvalColumn.average.name: average,
|
123 |
AutoEvalColumn.license.name: self.license,
|
124 |
AutoEvalColumn.likes.name: self.likes,
|
125 |
AutoEvalColumn.params.name: self.num_params,
|
126 |
AutoEvalColumn.still_on_hub.name: self.still_on_hub,
|
127 |
}
|
128 |
+
|
129 |
+
print("Tasks", Tasks)
|
130 |
for task in Tasks:
|
131 |
+
data_dict[task.value.col_name] = self.results[task.value.benchmark]
|
|
|
132 |
|
133 |
return data_dict
|
134 |
|
|
|
187 |
|
188 |
results = []
|
189 |
for v in eval_results.values():
|
|
|
190 |
try:
|
191 |
v.to_dict() # we test if the dict version is complete
|
192 |
results.append(v)
|