jjkim
commited on
Commit
·
dc264fe
1
Parent(s):
f435ec5
- code_eval.py +8 -6
code_eval.py
CHANGED
@@ -20,7 +20,7 @@ import itertools
|
|
20 |
import os
|
21 |
from collections import Counter, defaultdict
|
22 |
from concurrent.futures import CancelledError, ThreadPoolExecutor, as_completed
|
23 |
-
from typing import List, Optional
|
24 |
import time
|
25 |
from string import Template
|
26 |
|
@@ -145,8 +145,8 @@ class CodeEval(evaluate.Metric):
|
|
145 |
# This defines the format of each prediction and reference
|
146 |
features=datasets.Features(
|
147 |
{
|
148 |
-
"predictions":
|
149 |
-
"references":
|
150 |
}
|
151 |
),
|
152 |
homepage="https://github.com/openai/human-eval",
|
@@ -178,7 +178,7 @@ class CodeEval(evaluate.Metric):
|
|
178 |
raise NotImplementedError(
|
179 |
"This metric is currently not supported on Windows."
|
180 |
)
|
181 |
-
|
182 |
predictions = sorted(predictions, key=lambda x: x["id"])
|
183 |
references = sorted(references, key=lambda x: x["id"])
|
184 |
with ThreadPoolExecutor(max_workers=num_workers) as executor:
|
@@ -186,7 +186,7 @@ class CodeEval(evaluate.Metric):
|
|
186 |
for pred_d, ref_d in zip(predictions, references):
|
187 |
assert pred_d["id"] == ref_d["id"]
|
188 |
tid = pred_d["id"]
|
189 |
-
|
190 |
results[tid] = []
|
191 |
pred = pred_d[pred_key]
|
192 |
ref = ref_d[ref_key]
|
@@ -204,7 +204,9 @@ class CodeEval(evaluate.Metric):
|
|
204 |
result.add(future)
|
205 |
results[tid].append(result)
|
206 |
|
207 |
-
pbar = tqdm(
|
|
|
|
|
208 |
prev_done_count = 0
|
209 |
done = False
|
210 |
while not done:
|
|
|
20 |
import os
|
21 |
from collections import Counter, defaultdict
|
22 |
from concurrent.futures import CancelledError, ThreadPoolExecutor, as_completed
|
23 |
+
from typing import Dict, List, Optional
|
24 |
import time
|
25 |
from string import Template
|
26 |
|
|
|
145 |
# This defines the format of each prediction and reference
|
146 |
features=datasets.Features(
|
147 |
{
|
148 |
+
"predictions": List[Dict],
|
149 |
+
"references": List[Dict],
|
150 |
}
|
151 |
),
|
152 |
homepage="https://github.com/openai/human-eval",
|
|
|
178 |
raise NotImplementedError(
|
179 |
"This metric is currently not supported on Windows."
|
180 |
)
|
181 |
+
|
182 |
predictions = sorted(predictions, key=lambda x: x["id"])
|
183 |
references = sorted(references, key=lambda x: x["id"])
|
184 |
with ThreadPoolExecutor(max_workers=num_workers) as executor:
|
|
|
186 |
for pred_d, ref_d in zip(predictions, references):
|
187 |
assert pred_d["id"] == ref_d["id"]
|
188 |
tid = pred_d["id"]
|
189 |
+
|
190 |
results[tid] = []
|
191 |
pred = pred_d[pred_key]
|
192 |
ref = ref_d[ref_key]
|
|
|
204 |
result.add(future)
|
205 |
results[tid].append(result)
|
206 |
|
207 |
+
pbar = tqdm(
|
208 |
+
total=sum(len(r) for r in results.values()), disable=disable_tqdm
|
209 |
+
)
|
210 |
prev_done_count = 0
|
211 |
done = False
|
212 |
while not done:
|