sh1gechan commited on
Commit
167f442
·
verified ·
1 Parent(s): 29b13b8

Update src/leaderboard/read_evals.py

Browse files
Files changed (1) hide show
  1. src/leaderboard/read_evals.py +4 -6
src/leaderboard/read_evals.py CHANGED
@@ -77,7 +77,6 @@ class EvalResult:
77
  task_value = task.value
78
  if task_value.metric in scores:
79
  results[task_value.benchmark] = Decimal(scores[task_value.metric])
80
- print(f"Debug: {task_value.benchmark} = {results[task_value.benchmark]}")
81
 
82
 
83
  return self(
@@ -110,7 +109,6 @@ class EvalResult:
110
 
111
  def to_dict(self):
112
  """Converts the Eval Result to a dict compatible with our dataframe display"""
113
- average = sum([v for v in self.results.values() if v is not None]) / len(self.results)
114
  data_dict = {
115
  "eval_name": self.eval_name, # not a column, just a save name,
116
  AutoEvalColumn.precision.name: self.precision.value.name,
@@ -120,16 +118,15 @@ class EvalResult:
120
  AutoEvalColumn.architecture.name: self.architecture,
121
  AutoEvalColumn.model.name: make_clickable_model(self.full_model),
122
  AutoEvalColumn.revision.name: self.revision,
123
- AutoEvalColumn.average.name: average,
124
  AutoEvalColumn.license.name: self.license,
125
  AutoEvalColumn.likes.name: self.likes,
126
  AutoEvalColumn.params.name: self.num_params,
127
  AutoEvalColumn.still_on_hub.name: self.still_on_hub,
128
  }
129
-
130
  for task in Tasks:
131
- data_dict[task.value.col_name] = self.results.get(task.value.benchmark, None)
132
- print(f"Debug: {task.value.col_name} = {self.results.get(task.value.benchmark, 'N/A')}")
133
 
134
  return data_dict
135
 
@@ -188,6 +185,7 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
188
 
189
  results = []
190
  for v in eval_results.values():
 
191
  try:
192
  v.to_dict() # we test if the dict version is complete
193
  results.append(v)
 
77
  task_value = task.value
78
  if task_value.metric in scores:
79
  results[task_value.benchmark] = Decimal(scores[task_value.metric])
 
80
 
81
 
82
  return self(
 
109
 
110
  def to_dict(self):
111
  """Converts the Eval Result to a dict compatible with our dataframe display"""
 
112
  data_dict = {
113
  "eval_name": self.eval_name, # not a column, just a save name,
114
  AutoEvalColumn.precision.name: self.precision.value.name,
 
118
  AutoEvalColumn.architecture.name: self.architecture,
119
  AutoEvalColumn.model.name: make_clickable_model(self.full_model),
120
  AutoEvalColumn.revision.name: self.revision,
 
121
  AutoEvalColumn.license.name: self.license,
122
  AutoEvalColumn.likes.name: self.likes,
123
  AutoEvalColumn.params.name: self.num_params,
124
  AutoEvalColumn.still_on_hub.name: self.still_on_hub,
125
  }
126
+
127
  for task in Tasks:
128
+ task_value = task.value
129
+ data_dict[task_value.col_name] = self.results[task_value.benchmark]
130
 
131
  return data_dict
132
 
 
185
 
186
  results = []
187
  for v in eval_results.values():
188
+ print("v", v)
189
  try:
190
  v.to_dict() # we test if the dict version is complete
191
  results.append(v)