sh1gechan commited on
Commit
6d1c2ae
·
verified ·
1 Parent(s): 167f442

Update src/leaderboard/read_evals.py

Browse files
Files changed (1) hide show
  1. src/leaderboard/read_evals.py +5 -4
src/leaderboard/read_evals.py CHANGED
@@ -109,6 +109,7 @@ class EvalResult:
109
 
110
  def to_dict(self):
111
  """Converts the Eval Result to a dict compatible with our dataframe display"""
 
112
  data_dict = {
113
  "eval_name": self.eval_name, # not a column, just a save name,
114
  AutoEvalColumn.precision.name: self.precision.value.name,
@@ -118,15 +119,16 @@ class EvalResult:
118
  AutoEvalColumn.architecture.name: self.architecture,
119
  AutoEvalColumn.model.name: make_clickable_model(self.full_model),
120
  AutoEvalColumn.revision.name: self.revision,
 
121
  AutoEvalColumn.license.name: self.license,
122
  AutoEvalColumn.likes.name: self.likes,
123
  AutoEvalColumn.params.name: self.num_params,
124
  AutoEvalColumn.still_on_hub.name: self.still_on_hub,
125
  }
126
-
 
127
  for task in Tasks:
128
- task_value = task.value
129
- data_dict[task_value.col_name] = self.results[task_value.benchmark]
130
 
131
  return data_dict
132
 
@@ -185,7 +187,6 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
185
 
186
  results = []
187
  for v in eval_results.values():
188
- print("v", v)
189
  try:
190
  v.to_dict() # we test if the dict version is complete
191
  results.append(v)
 
109
 
110
  def to_dict(self):
111
  """Converts the Eval Result to a dict compatible with our dataframe display"""
112
+ average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
113
  data_dict = {
114
  "eval_name": self.eval_name, # not a column, just a save name,
115
  AutoEvalColumn.precision.name: self.precision.value.name,
 
119
  AutoEvalColumn.architecture.name: self.architecture,
120
  AutoEvalColumn.model.name: make_clickable_model(self.full_model),
121
  AutoEvalColumn.revision.name: self.revision,
122
+ AutoEvalColumn.average.name: average,
123
  AutoEvalColumn.license.name: self.license,
124
  AutoEvalColumn.likes.name: self.likes,
125
  AutoEvalColumn.params.name: self.num_params,
126
  AutoEvalColumn.still_on_hub.name: self.still_on_hub,
127
  }
128
+
129
+ print("Tasks", Tasks)
130
  for task in Tasks:
131
+ data_dict[task.value.col_name] = self.results[task.value.benchmark]
 
132
 
133
  return data_dict
134
 
 
187
 
188
  results = []
189
  for v in eval_results.values():
 
190
  try:
191
  v.to_dict() # we test if the dict version is complete
192
  results.append(v)