Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Update src/leaderboard/read_evals.py
Browse files
src/leaderboard/read_evals.py
CHANGED
@@ -77,7 +77,6 @@ class EvalResult:
|
|
77 |
task_value = task.value
|
78 |
if task_value.metric in scores:
|
79 |
results[task_value.benchmark] = Decimal(scores[task_value.metric])
|
80 |
-
print(f"Debug: {task_value.benchmark} = {results[task_value.benchmark]}")
|
81 |
|
82 |
|
83 |
return self(
|
@@ -110,7 +109,6 @@ class EvalResult:
|
|
110 |
|
111 |
def to_dict(self):
|
112 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
113 |
-
average = sum([v for v in self.results.values() if v is not None]) / len(self.results)
|
114 |
data_dict = {
|
115 |
"eval_name": self.eval_name, # not a column, just a save name,
|
116 |
AutoEvalColumn.precision.name: self.precision.value.name,
|
@@ -120,16 +118,15 @@ class EvalResult:
|
|
120 |
AutoEvalColumn.architecture.name: self.architecture,
|
121 |
AutoEvalColumn.model.name: make_clickable_model(self.full_model),
|
122 |
AutoEvalColumn.revision.name: self.revision,
|
123 |
-
AutoEvalColumn.average.name: average,
|
124 |
AutoEvalColumn.license.name: self.license,
|
125 |
AutoEvalColumn.likes.name: self.likes,
|
126 |
AutoEvalColumn.params.name: self.num_params,
|
127 |
AutoEvalColumn.still_on_hub.name: self.still_on_hub,
|
128 |
}
|
129 |
-
|
130 |
for task in Tasks:
|
131 |
-
|
132 |
-
|
133 |
|
134 |
return data_dict
|
135 |
|
@@ -188,6 +185,7 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
|
|
188 |
|
189 |
results = []
|
190 |
for v in eval_results.values():
|
|
|
191 |
try:
|
192 |
v.to_dict() # we test if the dict version is complete
|
193 |
results.append(v)
|
|
|
77 |
task_value = task.value
|
78 |
if task_value.metric in scores:
|
79 |
results[task_value.benchmark] = Decimal(scores[task_value.metric])
|
|
|
80 |
|
81 |
|
82 |
return self(
|
|
|
109 |
|
110 |
def to_dict(self):
|
111 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
|
|
112 |
data_dict = {
|
113 |
"eval_name": self.eval_name, # not a column, just a save name,
|
114 |
AutoEvalColumn.precision.name: self.precision.value.name,
|
|
|
118 |
AutoEvalColumn.architecture.name: self.architecture,
|
119 |
AutoEvalColumn.model.name: make_clickable_model(self.full_model),
|
120 |
AutoEvalColumn.revision.name: self.revision,
|
|
|
121 |
AutoEvalColumn.license.name: self.license,
|
122 |
AutoEvalColumn.likes.name: self.likes,
|
123 |
AutoEvalColumn.params.name: self.num_params,
|
124 |
AutoEvalColumn.still_on_hub.name: self.still_on_hub,
|
125 |
}
|
126 |
+
|
127 |
for task in Tasks:
|
128 |
+
task_value = task.value
|
129 |
+
data_dict[task_value.col_name] = self.results[task_value.benchmark]
|
130 |
|
131 |
return data_dict
|
132 |
|
|
|
185 |
|
186 |
results = []
|
187 |
for v in eval_results.values():
|
188 |
+
print("v", v)
|
189 |
try:
|
190 |
v.to_dict() # we test if the dict version is complete
|
191 |
results.append(v)
|