--- tags: - deep-reinforcement-learning - reinforcement-learning - stable-baselines3 --- # PPO Agent playing PongNoFrameskip-v4 This is a trained model of a PPO agent playing PongNoFrameskip-v4 using the stable-baselines3 library (our agent is the 🟢 one). ## Evaluation Results Mean_reward = 21.00 +/- 0.0 # Usage (with Stable-baselines3) ## Watch your agent interacts (in Google Colab) - You need to use `gym==0.19` since it **includes Atari Roms**. - The Actor Space is 6 since we use only **legit actions**. ```python # Install these libraries in one cell (don't forget to restart the runtime after installing the librairies) !pip install stable-baselines3[extra] !pip install huggingface_sb3 !pip install huggingface_hub !pip install pickle5 ``` Don't forget to restart the runtime before running the code below: ```python # Import the libraries import os import gym from stable_baselines3 import PPO from stable_baselines3.common.vec_env import VecNormalize from stable_baselines3.common.env_util import make_atari_env from stable_baselines3.common.vec_env import VecFrameStack from stable_baselines3 import PPO from stable_baselines3.common.callbacks import CheckpointCallback from huggingface_sb3 import load_from_hub, push_to_hub import gym from stable_baselines3.common.vec_env import VecVideoRecorder, DummyVecEnv from stable_baselines3.common.evaluation import evaluate_policy # Load the model checkpoint = load_from_hub("ThomasSimonini/ppo-PongNoFrameskip-v4", "ppo-PongNoFrameskip-v4.zip") # Because we using 3.7 on Colab and this agent was trained with 3.8 to avoid Pickle errors: custom_objects = { "learning_rate": 0.0, "lr_schedule": lambda _: 0.0, "clip_range": lambda _: 0.0, } model= PPO.load(checkpoint, custom_objects=custom_objects) ## Evaluate the agent env = make_atari_env('PongNoFrameskip-v4', n_envs=1) env = VecFrameStack(env, n_stack=4) mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=10) print(f"mean_reward={mean_reward:.2f} +/- {std_reward}") ## Generate a video of your agent performing with Colab !pip install gym pyvirtualdisplay > /dev/null 2>&1 !apt-get install -y xvfb python-opengl ffmpeg > /dev/null 2>&1 !pip install colabgymrender==1.0.2 observation = env.reset() terminal = False while not terminal: action, _state = model.predict(observation) observation, reward, terminal, info = env.step(action) env.play() ``` ## Training Code - You need to use `gym==0.19` since it **includes Atari Roms**. - The Actor Space is 6 since we use only **legit actions**. ```python import wandb import gym from stable_baselines3 import PPO from stable_baselines3.common.env_util import make_atari_env from stable_baselines3.common.vec_env import VecFrameStack, VecVideoRecorder from stable_baselines3.common.callbacks import CheckpointCallback from wandb.integration.sb3 import WandbCallback from huggingface_sb3 import load_from_hub, push_to_hub config = { "env_name": "PongNoFrameskip-v4", "num_envs": 8, "total_timesteps": int(10e6), "seed": 4089164106, } run = wandb.init( project="HFxSB3", config = config, sync_tensorboard = True, # Auto-upload sb3's tensorboard metrics monitor_gym = True, # Auto-upload the videos of agents playing the game save_code = True, # Save the code to W&B ) # There already exists an environment generator # that will make and wrap atari environments correctly. # Here we are also multi-worker training (n_envs=8 => 8 environments) env = make_atari_env(config["env_name"], n_envs=config["num_envs"], seed=config["seed"]) #PongNoFrameskip-v4 print("ENV ACTION SPACE: ", env.action_space.n) # Frame-stacking with 4 frames env = VecFrameStack(env, n_stack=4) # Video recorder env = VecVideoRecorder(env, "videos", record_video_trigger=lambda x: x % 100000 == 0, video_length=2000) # https://github.com/DLR-RM/rl-trained-agents/blob/10a9c31e806820d59b20d8b85ca67090338ea912/ppo/PongNoFrameskip-v4_1/PongNoFrameskip-v4/config.yml model = PPO(policy = "CnnPolicy", env = env, batch_size = 256, clip_range = 0.1, ent_coef = 0.01, gae_lambda = 0.9, gamma = 0.99, learning_rate = 2.5e-4, max_grad_norm = 0.5, n_epochs = 4, n_steps = 128, vf_coef = 0.5, tensorboard_log = f"runs", verbose=1, ) model.learn( total_timesteps = config["total_timesteps"], callback = [ WandbCallback( gradient_save_freq = 1000, model_save_path = f"models/{run.id}", ), CheckpointCallback(save_freq=10000, save_path='./pong', name_prefix=config["env_name"]), ] ) model.save("ppo-PongNoFrameskip-v4.zip") push_to_hub(repo_id="ThomasSimonini/ppo-PongNoFrameskip-v4", filename="ppo-PongNoFrameskip-v4.zip", commit_message="Added Pong trained agent") ```