Spaces:
Sleeping
Sleeping
import os | |
import json | |
import gzip | |
from typing import Dict, Iterable | |
def stream_jsonl(filename: str) -> Iterable[Dict]: | |
""" | |
Parses each jsonl line and yields it as a dictionary | |
""" | |
if filename.endswith(".gz"): | |
with open(filename, "rb") as gzfp: | |
with gzip.open(gzfp, "rt") as fp: | |
for line in fp: | |
if any(not x.isspace() for x in line): | |
yield json.loads(line) | |
else: | |
with open(filename, "r") as fp: | |
for line in fp: | |
if any(not x.isspace() for x in line): | |
yield json.loads(line) | |
def load_solutions(samples) -> Iterable[Dict]: | |
""" | |
""" | |
for i, sample in enumerate(samples): | |
sample["_identifier"] = ( | |
sample["task_id"] + f" (line {i+1} )" | |
) | |
yield sample | |