bcb_evaluator_testing / api /bigcodebench_data.py
jjyang77
update samples input from file to data list
da384b4
raw
history blame
850 Bytes
import os
import json
import gzip
from typing import Dict, Iterable
def stream_jsonl(filename: str) -> Iterable[Dict]:
"""
Parses each jsonl line and yields it as a dictionary
"""
if filename.endswith(".gz"):
with open(filename, "rb") as gzfp:
with gzip.open(gzfp, "rt") as fp:
for line in fp:
if any(not x.isspace() for x in line):
yield json.loads(line)
else:
with open(filename, "r") as fp:
for line in fp:
if any(not x.isspace() for x in line):
yield json.loads(line)
def load_solutions(samples) -> Iterable[Dict]:
"""
"""
for i, sample in enumerate(samples):
sample["_identifier"] = (
sample["task_id"] + f" (line {i+1} )"
)
yield sample