leaderboard

Runtime error

File size: 3,029 Bytes

import json
import os
import re
import tempfile

from huggingface_hub import CommitOperationAdd, HfApi

from src.evaluation import evaluate
from src.logging import setup_logger

logger = setup_logger(__name__)

API = HfApi(token=os.environ.get("TOKEN"))
RESULTS_REPO = "open-rl-leaderboard/results"


def _backend_routine():
    # List only the text classification models
    rl_models = list(API.list_models(filter="reinforcement-learning"))
    logger.info(f"Found {len(rl_models)} RL models")
    compatible_models = []
    for model in rl_models:
        filenames = [sib.rfilename for sib in model.siblings]
        if "agent.pt" in filenames:
            compatible_models.append((model.modelId, model.sha))

    logger.info(f"Found {len(compatible_models)} compatible models")

    # Get the results
    pattern = re.compile(r"^[^/]*/[^/]*/[^/]*results_[a-f0-9]+\.json$")
    filenames = API.list_repo_files(RESULTS_REPO, repo_type="dataset")
    filenames = [filename for filename in filenames if pattern.match(filename)]

    evaluated_models = set()
    for filename in filenames:
        path = API.hf_hub_download(repo_id=RESULTS_REPO, filename=filename, repo_type="dataset")
        with open(path) as fp:
            report = json.load(fp)
        evaluated_models.add((report["config"]["model_id"], report["config"]["model_sha"]))

    # Find the models that are not associated with any results
    pending_models = set(compatible_models) - evaluated_models
    logger.info(f"Found {len(pending_models)} pending models")

    # Run an evaluation on the models
    with tempfile.TemporaryDirectory() as tmp_dir:
        commits = []
        for model_id, sha in pending_models:
            logger.info(f"Running evaluation on {model_id}")
            report = {"config": {"model_id": model_id, "model_sha": sha}}
            try:
                evaluations = evaluate(model_id, revision=sha)
            except Exception as e:
                logger.error(f"Error evaluating {model_id}: {e}")
                evaluations = None

            if evaluations is not None:
                report["results"] = evaluations
                report["status"] = "DONE"
            else:
                report["status"] = "FAILED"

            # Update the results
            dumped = json.dumps(report, indent=2)
            path_in_repo = f"{model_id}/results_{sha}.json"
            local_path = os.path.join(tmp_dir, path_in_repo)
            os.makedirs(os.path.dirname(local_path), exist_ok=True)
            with open(local_path, "w") as f:
                f.write(dumped)

            commits.append(CommitOperationAdd(path_in_repo=path_in_repo, path_or_fileobj=local_path))

        if len(commits) > 0:
            API.create_commit(
                repo_id=RESULTS_REPO, commit_message="Add evaluation results", operations=commits, repo_type="dataset"
            )


def backend_routine():
    try:
        _backend_routine()
    except Exception as e:
        logger.error(f"{e.__class__.__name__}: {str(e)}")