Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Alina Lozovskaia
commited on
Commit
·
9b133aa
1
Parent(s):
79b2cd5
more read_evals.py improvement
Browse files
src/leaderboard/filter_models.py
CHANGED
@@ -141,7 +141,7 @@ def flag_models(leaderboard_data: list[dict]):
|
|
141 |
else:
|
142 |
flag_key = model_data[AutoEvalColumn.fullname.name]
|
143 |
if flag_key in FLAGGED_MODELS:
|
144 |
-
logging.info(f"Flagged model: {flag_key}")
|
145 |
issue_num = FLAGGED_MODELS[flag_key].split("/")[-1]
|
146 |
issue_link = model_hyperlink(
|
147 |
FLAGGED_MODELS[flag_key],
|
|
|
141 |
else:
|
142 |
flag_key = model_data[AutoEvalColumn.fullname.name]
|
143 |
if flag_key in FLAGGED_MODELS:
|
144 |
+
# logging.info(f"Flagged model: {flag_key}") # Do we need to print out the list of flagged models?
|
145 |
issue_num = FLAGGED_MODELS[flag_key].split("/")[-1]
|
146 |
issue_link = model_hyperlink(
|
147 |
FLAGGED_MODELS[flag_key],
|
src/leaderboard/read_evals.py
CHANGED
@@ -4,11 +4,13 @@ from datetime import datetime
|
|
4 |
from json import JSONDecodeError
|
5 |
import logging
|
6 |
import math
|
7 |
-
|
8 |
from dataclasses import dataclass, field
|
9 |
from typing import Optional, Dict, List
|
10 |
|
11 |
-
import
|
|
|
|
|
12 |
import numpy as np
|
13 |
|
14 |
from src.display.formatting import make_clickable_model
|
@@ -213,15 +215,16 @@ def get_raw_eval_results(results_path: str, requests_path: str, dynamic_path: st
|
|
213 |
dynamic_data = json.load(f)
|
214 |
|
215 |
results_path = Path(results_path)
|
216 |
-
|
217 |
model_files = list(results_path.rglob('results_*.json'))
|
218 |
model_files.sort(key=lambda file: parse_datetime(file.stem.removeprefix("results_")))
|
219 |
|
220 |
eval_results = {}
|
221 |
-
for
|
|
|
222 |
# Creation of result
|
223 |
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
224 |
-
|
|
|
225 |
|
226 |
if eval_result.full_model in dynamic_data:
|
227 |
eval_result.update_with_dynamic_file_dict(dynamic_data[eval_result.full_model])
|
|
|
4 |
from json import JSONDecodeError
|
5 |
import logging
|
6 |
import math
|
7 |
+
|
8 |
from dataclasses import dataclass, field
|
9 |
from typing import Optional, Dict, List
|
10 |
|
11 |
+
from tqdm import tqdm
|
12 |
+
from tqdm.contrib.logging import logging_redirect_tqdm
|
13 |
+
|
14 |
import numpy as np
|
15 |
|
16 |
from src.display.formatting import make_clickable_model
|
|
|
215 |
dynamic_data = json.load(f)
|
216 |
|
217 |
results_path = Path(results_path)
|
|
|
218 |
model_files = list(results_path.rglob('results_*.json'))
|
219 |
model_files.sort(key=lambda file: parse_datetime(file.stem.removeprefix("results_")))
|
220 |
|
221 |
eval_results = {}
|
222 |
+
# Wrap model_files iteration with tqdm for progress display
|
223 |
+
for model_result_filepath in tqdm(model_files, desc="Processing model files"):
|
224 |
# Creation of result
|
225 |
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
226 |
+
with logging_redirect_tqdm():
|
227 |
+
eval_result.update_with_request_file(requests_path)
|
228 |
|
229 |
if eval_result.full_model in dynamic_data:
|
230 |
eval_result.update_with_dynamic_file_dict(dynamic_data[eval_result.full_model])
|