jjkim commited on
Commit
dc264fe
·
1 Parent(s): f435ec5
Files changed (1) hide show
  1. code_eval.py +8 -6
code_eval.py CHANGED
@@ -20,7 +20,7 @@ import itertools
20
  import os
21
  from collections import Counter, defaultdict
22
  from concurrent.futures import CancelledError, ThreadPoolExecutor, as_completed
23
- from typing import List, Optional
24
  import time
25
  from string import Template
26
 
@@ -145,8 +145,8 @@ class CodeEval(evaluate.Metric):
145
  # This defines the format of each prediction and reference
146
  features=datasets.Features(
147
  {
148
- "predictions": list,
149
- "references": list,
150
  }
151
  ),
152
  homepage="https://github.com/openai/human-eval",
@@ -178,7 +178,7 @@ class CodeEval(evaluate.Metric):
178
  raise NotImplementedError(
179
  "This metric is currently not supported on Windows."
180
  )
181
-
182
  predictions = sorted(predictions, key=lambda x: x["id"])
183
  references = sorted(references, key=lambda x: x["id"])
184
  with ThreadPoolExecutor(max_workers=num_workers) as executor:
@@ -186,7 +186,7 @@ class CodeEval(evaluate.Metric):
186
  for pred_d, ref_d in zip(predictions, references):
187
  assert pred_d["id"] == ref_d["id"]
188
  tid = pred_d["id"]
189
-
190
  results[tid] = []
191
  pred = pred_d[pred_key]
192
  ref = ref_d[ref_key]
@@ -204,7 +204,9 @@ class CodeEval(evaluate.Metric):
204
  result.add(future)
205
  results[tid].append(result)
206
 
207
- pbar = tqdm(total=sum(len(r) for r in results.values()), disable=disable_tqdm)
 
 
208
  prev_done_count = 0
209
  done = False
210
  while not done:
 
20
  import os
21
  from collections import Counter, defaultdict
22
  from concurrent.futures import CancelledError, ThreadPoolExecutor, as_completed
23
+ from typing import Dict, List, Optional
24
  import time
25
  from string import Template
26
 
 
145
  # This defines the format of each prediction and reference
146
  features=datasets.Features(
147
  {
148
+ "predictions": List[Dict],
149
+ "references": List[Dict],
150
  }
151
  ),
152
  homepage="https://github.com/openai/human-eval",
 
178
  raise NotImplementedError(
179
  "This metric is currently not supported on Windows."
180
  )
181
+
182
  predictions = sorted(predictions, key=lambda x: x["id"])
183
  references = sorted(references, key=lambda x: x["id"])
184
  with ThreadPoolExecutor(max_workers=num_workers) as executor:
 
186
  for pred_d, ref_d in zip(predictions, references):
187
  assert pred_d["id"] == ref_d["id"]
188
  tid = pred_d["id"]
189
+
190
  results[tid] = []
191
  pred = pred_d[pred_key]
192
  ref = ref_d[ref_key]
 
204
  result.add(future)
205
  results[tid].append(result)
206
 
207
+ pbar = tqdm(
208
+ total=sum(len(r) for r in results.values()), disable=disable_tqdm
209
+ )
210
  prev_done_count = 0
211
  done = False
212
  while not done: