import glob import json import logging import os import gradio as gr import numpy as np import pandas as pd from apscheduler.schedulers.background import BackgroundScheduler from huggingface_hub import HfApi from rliable import library as rly from rliable import metrics from src.backend import backend_routine from src.logging import configure_root_logger, setup_logger configure_root_logger() logger = setup_logger(__name__) logging.getLogger("absl").setLevel(logging.WARNING) API = HfApi(token=os.environ.get("TOKEN")) RESULTS_REPO = "open-rl-leaderboard/results" REFRESH_RATE = 5 * 60 # 5 minutes ALL_ENV_IDS = { "Atari": [ "AdventureNoFrameskip-v4", "AirRaidNoFrameskip-v4", "AlienNoFrameskip-v4", "AmidarNoFrameskip-v4", "AssaultNoFrameskip-v4", "AsterixNoFrameskip-v4", "AsteroidsNoFrameskip-v4", "AtlantisNoFrameskip-v4", "BankHeistNoFrameskip-v4", "BattleZoneNoFrameskip-v4", "BeamRiderNoFrameskip-v4", "BerzerkNoFrameskip-v4", "BowlingNoFrameskip-v4", "BoxingNoFrameskip-v4", "BreakoutNoFrameskip-v4", "CarnivalNoFrameskip-v4", "CentipedeNoFrameskip-v4", "ChopperCommandNoFrameskip-v4", "CrazyClimberNoFrameskip-v4", "DefenderNoFrameskip-v4", "DemonAttackNoFrameskip-v4", "DoubleDunkNoFrameskip-v4", "ElevatorActionNoFrameskip-v4", "EnduroNoFrameskip-v4", "FishingDerbyNoFrameskip-v4", "FreewayNoFrameskip-v4", "FrostbiteNoFrameskip-v4", "GopherNoFrameskip-v4", "GravitarNoFrameskip-v4", "HeroNoFrameskip-v4", "IceHockeyNoFrameskip-v4", "JamesbondNoFrameskip-v4", "JourneyEscapeNoFrameskip-v4", "KangarooNoFrameskip-v4", "KrullNoFrameskip-v4", "KungFuMasterNoFrameskip-v4", "MontezumaRevengeNoFrameskip-v4", "MsPacmanNoFrameskip-v4", "NameThisGameNoFrameskip-v4", "PhoenixNoFrameskip-v4", "PitfallNoFrameskip-v4", "PongNoFrameskip-v4", "PooyanNoFrameskip-v4", "PrivateEyeNoFrameskip-v4", "QbertNoFrameskip-v4", "RiverraidNoFrameskip-v4", "RoadRunnerNoFrameskip-v4", "RobotankNoFrameskip-v4", "SeaquestNoFrameskip-v4", "SkiingNoFrameskip-v4", "SolarisNoFrameskip-v4", "SpaceInvadersNoFrameskip-v4", "StarGunnerNoFrameskip-v4", "TennisNoFrameskip-v4", "TimePilotNoFrameskip-v4", "TutankhamNoFrameskip-v4", "UpNDownNoFrameskip-v4", "VentureNoFrameskip-v4", "VideoPinballNoFrameskip-v4", "WizardOfWorNoFrameskip-v4", "YarsRevengeNoFrameskip-v4", "ZaxxonNoFrameskip-v4", ], "Box2D": [ "BipedalWalker-v3", "BipedalWalkerHardcore-v3", "CarRacing-v2", "LunarLander-v2", "LunarLanderContinuous-v2", ], "Toy text": [ "Blackjack-v1", "CliffWalking-v0", "FrozenLake-v1", "FrozenLake8x8-v1", ], "Classic control": [ "Acrobot-v1", "CartPole-v1", "MountainCar-v0", "MountainCarContinuous-v0", "Pendulum-v1", ], "MuJoCo": [ "Ant-v4", "HalfCheetah-v4", "Hopper-v4", "Humanoid-v4", "HumanoidStandup-v4", "InvertedDoublePendulum-v4", "InvertedPendulum-v4", "Pusher-v4", "Reacher-v4", "Swimmer-v4", "Walker2d-v4", ], } def iqm(x): score_dict = {"a": np.expand_dims(np.array(x), 1)} def aggregate_func(x): return np.array([metrics.aggregate_iqm(x)]) aggregate_scores, aggregate_score_cis = rly.get_interval_estimates(score_dict, aggregate_func, reps=1000) iqm, _, _ = aggregate_scores["a"][0], aggregate_score_cis["a"][0][0], aggregate_score_cis["a"][1][0] return iqm def get_leaderboard_df(): dir_path = API.snapshot_download(repo_id=RESULTS_REPO, repo_type="dataset") pattern = os.path.join(dir_path, "**", "results_*.json") filenames = glob.glob(pattern, recursive=True) data = [] for filename in filenames: with open(filename) as fp: report = json.load(fp) user_id, model_id = report["config"]["model_id"].split("/") row = {"user_id": user_id, "model_id": model_id, "model_sha": report["config"]["model_sha"]} if report["status"] == "DONE" and len(report["results"]) > 0: env_ids = list(report["results"].keys()) assert len(env_ids) == 1, "Only one environment supported for the moment" row["env_id"] = env_ids[0] row["iqm_episodic_return"] = iqm(report["results"][env_ids[0]]["episodic_returns"]) data.append(row) df = pd.DataFrame(data) # create DataFrame df = df.fillna("") # replace NaN values with empty strings return df def select_env(df: pd.DataFrame, env_id: str): df = df[df["env_id"] == env_id] df = df.sort_values("iqm_episodic_return", ascending=False) df["ranking"] = np.arange(1, len(df) + 1) return df def format_df(df: pd.DataFrame): # Add hyperlinks df = df.copy() for index, row in df.iterrows(): user_id = row["user_id"] model_id = row["model_id"] df.loc[index, "user_id"] = f"[{user_id}](https://huggingface.co/{user_id})" df.loc[index, "model_id"] = f"[{model_id}](https://huggingface.co/{user_id}/{model_id})" # Keep only the relevant columns df = df[["ranking", "user_id", "model_id", "iqm_episodic_return"]] return df.values.tolist() def refresh_dataframes(): df = get_leaderboard_df() all_dfs = [format_df(select_env(df, env_id)) for env_id in all_env_ids] return all_dfs def refresh_videos(): df = get_leaderboard_df() outputs = [] for env_id in all_env_ids: env_df = select_env(df, env_id) if not env_df.empty: user_id = env_df.iloc[0]["user_id"] model_id = env_df.iloc[0]["model_id"] model_sha = env_df.iloc[0]["model_sha"] repo_id = f"{user_id}/{model_id}" video_path = API.hf_hub_download(repo_id=repo_id, filename="replay.mp4", revision=model_sha, repo_type="model") outputs.append(video_path) else: outputs.append(None) return outputs def refresh_winners(): df = get_leaderboard_df() outputs = [] for env_id in all_env_ids: env_df = select_env(df, env_id) if not env_df.empty: winner = f'{env_df.iloc[0]["user_id"]}/{env_df.iloc[0]["model_id"]}' outputs.append( f"""## {env_id} ### π [{winner}](https://huggingface.co/{winner}) π""" ) # # Or in HTML: # outputs.append(f'