Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
pminervini
commited on
Commit
•
6411ad7
1
Parent(s):
dbd4d1b
update
Browse files- backend-cli.py +1 -1
- src/backend/run_eval_suite.py +1 -1
backend-cli.py
CHANGED
@@ -74,7 +74,7 @@ def request_to_result_name(request: EvalRequest) -> str:
|
|
74 |
|
75 |
def process_evaluation(task: Task, eval_request: EvalRequest) -> dict:
|
76 |
results = run_evaluation(eval_request=eval_request, task_names=[task.benchmark], num_fewshot=task.num_fewshot,
|
77 |
-
batch_size=1, device=DEVICE, use_cache=
|
78 |
|
79 |
dumped = json.dumps(results, indent=2)
|
80 |
print(dumped)
|
|
|
74 |
|
75 |
def process_evaluation(task: Task, eval_request: EvalRequest) -> dict:
|
76 |
results = run_evaluation(eval_request=eval_request, task_names=[task.benchmark], num_fewshot=task.num_fewshot,
|
77 |
+
batch_size=1, device=DEVICE, use_cache=None, limit=LIMIT)
|
78 |
|
79 |
dumped = json.dumps(results, indent=2)
|
80 |
print(dumped)
|
src/backend/run_eval_suite.py
CHANGED
@@ -6,7 +6,7 @@ import logging
|
|
6 |
logging.getLogger("openai").setLevel(logging.WARNING)
|
7 |
|
8 |
|
9 |
-
def run_evaluation(eval_request: EvalRequest, task_names, num_fewshot, batch_size, device, use_cache=
|
10 |
if limit:
|
11 |
print("WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT.")
|
12 |
|
|
|
6 |
logging.getLogger("openai").setLevel(logging.WARNING)
|
7 |
|
8 |
|
9 |
+
def run_evaluation(eval_request: EvalRequest, task_names, num_fewshot, batch_size, device, use_cache=None, limit=None) -> dict:
|
10 |
if limit:
|
11 |
print("WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT.")
|
12 |
|