code_eval

Runtime error

App Files Files Community

jjkim commited on Oct 10, 2023

Commit

346d7a2

1 Parent(s): 9cbcfb8

change inputs

Browse files

Files changed (1) hide show

code_eval.py +23 -10

code_eval.py CHANGED Viewed

@@ -22,6 +22,7 @@ from collections import Counter, defaultdict
 from concurrent.futures import CancelledError, ThreadPoolExecutor, as_completed
 from typing import List, Optional
 import time
 import datasets
 import evaluate
@@ -156,9 +157,12 @@ class CodeEval(evaluate.Metric):
     def _compute(
         self,
-        predictions,
         references,
-        task_ids=None,
         k=[1, 10, 100],
         num_workers=4,
         timeout=3.0,
@@ -174,18 +178,27 @@ class CodeEval(evaluate.Metric):
             raise NotImplementedError(
                 "This metric is currently not supported on Windows."
             )
-        task_ids = task_ids or list(range(len(predictions)))
         with ThreadPoolExecutor(max_workers=num_workers) as executor:
             results = {}
-            for tid, pred, ref in zip(task_ids, predictions, references):
                 results[tid] = []
-                for cid, candidate in enumerate(pred):
                     result = Result(task_id=tid, completion_id=cid)
-                    for test_case in ref:
-                        assert isinstance(test_case, str)
-                        test_program = candidate + "\n" + test_case
                         args = (test_program, timeout, tid, cid)
                         future = executor.submit(check_correctness, *args)
                         result.add(future)

 from concurrent.futures import CancelledError, ThreadPoolExecutor, as_completed
 from typing import List, Optional
 import time
+from string import Template
 import datasets
 import evaluate
     def _compute(
         self,
+        candidates,
+        cand_key,
+        cand_template,
         references,
+        ref_key,
+        ref_template,
         k=[1, 10, 100],
         num_workers=4,
         timeout=3.0,
             raise NotImplementedError(
                 "This metric is currently not supported on Windows."
             )
+        candidates = sorted(candidates, key=lambda x: x["id"])
+        references = sorted(references, key=lambda x: x["id"])
         with ThreadPoolExecutor(max_workers=num_workers) as executor:
             results = {}
+            for cand_d, ref_d in zip(candidates, references):
+                assert cand_d["id"] == ref_d["id"]
+                tid = cand_d["id"]
                 results[tid] = []
+                cand = cand_d[cand_key]
+                ref = ref_d[ref_key]
+                for cid, c in enumerate(cand):
                     result = Result(task_id=tid, completion_id=cid)
+                    body = Template(cand_template).safe_substitute(candidate=c)
+                    for r in ref:
+                        assert isinstance(r, str)
+                        test = Template(ref_template).safe_substitute(ref_key=r)
+                        test = Template(test).safe_substitute(reference=c)
+                        test_program = body + "\n" + test
                         args = (test_program, timeout, tid, cid)
                         future = executor.submit(check_correctness, *args)
                         result.add(future)