diff --git a/flake.nix b/flake.nix
index efc5189..90e490a 100644
--- a/flake.nix
+++ b/flake.nix
@@ -14,7 +14,7 @@
             cudaSupport = true;
           };
         };
-        py-pkgs = pkgs.python310Packages;
+        py-pkgs = pkgs.python38Packages;
         setup-script = pkgs.writeScriptBin "run-me-first" ''
           #micromamba install --yes -f conda-requirements.txt -c conda-forge -c pytorch -c nvidia
           pip install -r requirements.txt
@@ -65,20 +65,39 @@
         ];
 
         libraries = with pkgs; [
+          boost
+          ffmpeg
+          fluidsynth
+          game-music-emu
+          glib
+          gtk2
+          libGL
+          libjpeg
+          libstdcxx5
+          lua51Packages.lua
+          nasm
+          openal
+          SDL2
+          stdenv.cc.cc.lib
+          timidity
+          wildmidi
+          zlib
         ];
 
         packages = with pkgs; [
-          (python310.withPackages pylibs)
+          (python38.withPackages pylibs)
           cmake
           curl
+          gnutar
           jq
           stgit
           swig
+          unzip
         ];
       in
       {
         devShell = pkgs.mkShell {
-          buildInputs = packages;
+          buildInputs = packages ++ libraries;
           packages = with pkgs; [
             setup-script
           ];
diff --git a/notebooks/unit8/unit8_part2.ipynb b/notebooks/unit8/unit8_part2.ipynb
index 4a41921..99f9a65 100644
--- a/notebooks/unit8/unit8_part2.ipynb
+++ b/notebooks/unit8/unit8_part2.ipynb
@@ -1,690 +1,1651 @@
 {
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "view-in-github",
-        "colab_type": "text"
-      },
-      "source": [
-        "<a href=\"https://colab.research.google.com/github/huggingface/deep-rl-class/blob/main/notebooks/unit8/unit8_part2.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "OVx1gdg9wt9t"
-      },
-      "source": [
-        "# Unit 8 Part 2: Advanced Deep Reinforcement Learning. Using Sample Factory to play Doom from pixels\n",
-        "\n",
-        "<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit9/thumbnail2.png\" alt=\"Thumbnail\"/>\n",
-        "\n",
-        "In this notebook, we will learn how to train a Deep Neural Network to collect objects in a 3D environment based on the game of Doom, a video of the resulting policy is shown below. We train this policy using [Sample Factory](https://www.samplefactory.dev/), an asynchronous implementation of the PPO algorithm.\n",
-        "\n",
-        "Please note the following points:\n",
-        "\n",
-        "*   [Sample Factory](https://www.samplefactory.dev/) is an advanced RL framework and **only functions on Linux and Mac** (not Windows).\n",
-        "\n",
-        "*  The framework performs best on a **GPU machine with many CPU cores**, where it can achieve speeds of 100k interactions per second. The resources available on a standard Colab notebook **limit the performance of this library**. So the speed in this setting **does not reflect the real-world performance**.\n",
-        "* Benchmarks for Sample Factory are available in a number of settings, check out the [examples](https://github.com/alex-petrenko/sample-factory/tree/master/sf_examples) if you want to find out more.\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "I6_67HfI1CKg"
-      },
-      "outputs": [],
-      "source": [
-        "from IPython.display import HTML\n",
-        "\n",
-        "HTML('''<video width=\"640\" height=\"480\" controls>\n",
-        "  <source src=\"https://huggingface.co/edbeeching/doom_health_gathering_supreme_3333/resolve/main/replay.mp4\"\n",
-        "  type=\"video/mp4\">Your browser does not support the video tag.</video>'''\n",
-        ")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "DgHRAsYEXdyw"
-      },
-      "source": [
-        "To validate this hands-on for the [certification process](https://huggingface.co/deep-rl-course/en/unit0/introduction#certification-process), you need to push one model:\n",
-        "\n",
-        "- `doom_health_gathering_supreme` get a result of >= 5.\n",
-        "\n",
-        "To find your result, go to the [leaderboard](https://huggingface.co/spaces/huggingface-projects/Deep-Reinforcement-Learning-Leaderboard) and find your model, **the result = mean_reward - std of reward**\n",
-        "\n",
-        "If you don't find your model, **go to the bottom of the page and click on the refresh button**\n",
-        "\n",
-        "For more information about the certification process, check this section 👉 https://huggingface.co/deep-rl-course/en/unit0/introduction#certification-process"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "PU4FVzaoM6fC"
-      },
-      "source": [
-        "## Set the GPU 💪\n",
-        "- To **accelerate the agent's training, we'll use a GPU**. To do that, go to `Runtime > Change Runtime type`\n",
-        "\n",
-        "<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/notebooks/gpu-step1.jpg\" alt=\"GPU Step 1\">"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "KV0NyFdQM9ZG"
-      },
-      "source": [
-        "- `Hardware Accelerator > GPU`\n",
-        "\n",
-        "<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/notebooks/gpu-step2.jpg\" alt=\"GPU Step 2\">"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "-fSy5HzUcMWB"
-      },
-      "source": [
-        "Before starting to train our agent, let's **study the library and environments we're going to use**.\n",
-        "\n",
-        "## Sample Factory\n",
-        "\n",
-        "[Sample Factory](https://www.samplefactory.dev/) is one of the **fastest RL libraries focused on very efficient synchronous and asynchronous implementations of policy gradients (PPO)**.\n",
-        "\n",
-        "Sample Factory is thoroughly **tested, used by many researchers and practitioners**, and is actively maintained. Our implementation is known to **reach SOTA performance in a variety of domains while minimizing RL experiment training time and hardware requirements**.\n",
-        "\n",
-        "<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit9/samplefactoryenvs.png\" alt=\"Sample factory\"/>\n",
-        "\n",
-        "\n",
-        "\n",
-        "### Key features\n",
-        "\n",
-        "- Highly optimized algorithm [architecture](https://www.samplefactory.dev/06-architecture/overview/) for maximum learning throughput\n",
-        "- [Synchronous and asynchronous](https://www.samplefactory.dev/07-advanced-topics/sync-async/) training regimes\n",
-        "- [Serial (single-process) mode](https://www.samplefactory.dev/07-advanced-topics/serial-mode/) for easy debugging\n",
-        "- Optimal performance in both CPU-based and [GPU-accelerated environments](https://www.samplefactory.dev/09-environment-integrations/isaacgym/)\n",
-        "- Single- & multi-agent training, self-play, supports [training multiple policies](https://www.samplefactory.dev/07-advanced-topics/multi-policy-training/) at once on one or many GPUs\n",
-        "- Population-Based Training ([PBT](https://www.samplefactory.dev/07-advanced-topics/pbt/))\n",
-        "- Discrete, continuous, hybrid action spaces\n",
-        "- Vector-based, image-based, dictionary observation spaces\n",
-        "- Automatically creates a model architecture by parsing action/observation space specification. Supports [custom model architectures](https://www.samplefactory.dev/03-customization/custom-models/)\n",
-        "- Designed to be imported into other projects, [custom environments](https://www.samplefactory.dev/03-customization/custom-environments/) are first-class citizens\n",
-        "- Detailed [WandB and Tensorboard summaries](https://www.samplefactory.dev/05-monitoring/metrics-reference/), [custom metrics](https://www.samplefactory.dev/05-monitoring/custom-metrics/)\n",
-        "- [HuggingFace 🤗 integration](https://www.samplefactory.dev/10-huggingface/huggingface/) (upload trained models and metrics to the Hub)\n",
-        "- [Multiple](https://www.samplefactory.dev/09-environment-integrations/mujoco/) [example](https://www.samplefactory.dev/09-environment-integrations/atari/) [environment](https://www.samplefactory.dev/09-environment-integrations/vizdoom/) [integrations](https://www.samplefactory.dev/09-environment-integrations/dmlab/) with tuned parameters and trained models\n",
-        "\n",
-        "All of the above policies are available on the 🤗 hub. Search for the tag [sample-factory](https://huggingface.co/models?library=sample-factory&sort=downloads)\n",
-        "\n",
-        "### How sample-factory works\n",
-        "\n",
-        "Sample-factory is one of the **most highly optimized RL implementations available to the community**.\n",
-        "\n",
-        "It works by **spawning multiple processes that run rollout workers, inference workers and a learner worker**.\n",
-        "\n",
-        "The *workers* **communicate through shared memory, which lowers the communication cost between processes**.\n",
-        "\n",
-        "The *rollout workers* interact with the environment and send observations to the *inference workers*.\n",
-        "\n",
-        "The *inferences workers* query a fixed version of the policy and **send actions back to the rollout worker**.\n",
-        "\n",
-        "After *k* steps the rollout works send a trajectory of experience to the learner worker, **which it uses to update the agent’s policy network**.\n",
-        "\n",
-        "<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit9/samplefactory.png\" alt=\"Sample factory\"/>"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "nB68Eb9UgC94"
-      },
-      "source": [
-        "### Actor Critic models in Sample-factory\n",
-        "\n",
-        "Actor Critic models in Sample Factory are composed of three components:\n",
-        "\n",
-        "- **Encoder** - Process input observations (images, vectors) and map them to a vector. This is the part of the model you will most likely want to customize.\n",
-        "- **Core** - Intergrate vectors from one or more encoders, can optionally include a single- or multi-layer LSTM/GRU in a memory-based agent.\n",
-        "- **Decoder** - Apply additional layers to the output of the model core before computing the policy and value outputs.\n",
-        "\n",
-        "The library has been designed to automatically support any observation and action spaces. Users can easily add their custom models. You can find out more in the [documentation](https://www.samplefactory.dev/03-customization/custom-models/#actor-critic-models-in-sample-factory)."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "ez5UhUtYcWXF"
-      },
-      "source": [
-        "## ViZDoom\n",
-        "\n",
-        "[ViZDoom](https://vizdoom.cs.put.edu.pl/) is an **open-source python interface for the Doom Engine**.\n",
-        "\n",
-        "The library was created in 2016 by Marek Wydmuch, Michal Kempka  at the Institute of Computing Science, Poznan University of Technology, Poland.\n",
-        "\n",
-        "The library enables the **training of agents directly from the screen pixels in a number of scenarios**, including team deathmatch, shown in the video below. Because the ViZDoom environment is based on a game the was created in the 90s, it can be run on modern hardware at accelerated speeds, **allowing us to learn complex AI behaviors fairly quickly**.\n",
-        "\n",
-        "The library includes feature such as:\n",
-        "\n",
-        "- Multi-platform (Linux, macOS, Windows),\n",
-        "- API for Python and C++,\n",
-        "- [OpenAI Gym](https://www.gymlibrary.dev/) environment wrappers\n",
-        "- Easy-to-create custom scenarios (visual editors, scripting language, and examples available),\n",
-        "- Async and sync single-player and multiplayer modes,\n",
-        "- Lightweight (few MBs) and fast (up to 7000 fps in sync mode, single-threaded),\n",
-        "- Customizable resolution and rendering parameters,\n",
-        "- Access to the depth buffer (3D vision),\n",
-        "- Automatic labeling of game objects visible in the frame,\n",
-        "- Access to the audio buffer\n",
-        "- Access to the list of actors/objects and map geometry,\n",
-        "- Off-screen rendering and episode recording,\n",
-        "- Time scaling in async mode."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "wAMwza0d5QVj"
-      },
-      "source": [
-        "## We first need to install some dependencies that are required for the ViZDoom environment\n",
-        "\n",
-        "Now that our Colab runtime is set up, we can start by installing the dependencies required to run ViZDoom on linux.\n",
-        "\n",
-        "If you are following on your machine on Mac, you will want to follow the installation instructions on the [github page](https://github.com/Farama-Foundation/ViZDoom/blob/master/doc/Quickstart.md#-quickstart-for-macos-and-anaconda3-python-36)."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "RJMxkaldwIVx"
-      },
-      "outputs": [],
-      "source": [
-        "%%capture\n",
-        "%%bash\n",
-        "# Install ViZDoom deps from\n",
-        "# https://github.com/mwydmuch/ViZDoom/blob/master/doc/Building.md#-linux\n",
-        "\n",
-        "apt-get install build-essential zlib1g-dev libsdl2-dev libjpeg-dev \\\n",
-        "nasm tar libbz2-dev libgtk2.0-dev cmake git libfluidsynth-dev libgme-dev \\\n",
-        "libopenal-dev timidity libwildmidi-dev unzip ffmpeg\n",
-        "\n",
-        "# Boost libraries\n",
-        "apt-get install libboost-all-dev\n",
-        "\n",
-        "# Lua binding dependencies\n",
-        "apt-get install liblua5.1-dev"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "JT4att2c57MW"
-      },
-      "source": [
-        "## Then we can install Sample Factory and ViZDoom\n",
-        "- This can take 7min"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "bbqfPZnIsvA6"
-      },
-      "outputs": [],
-      "source": [
-        "# install python libraries\n",
-        "# thanks toinsson\n",
-        "!pip install faster-fifo==1.4.2\n",
-        "!pip install vizdoom"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "!pip install sample-factory==2.0.2"
-      ],
-      "metadata": {
-        "id": "alxUt7Au-O8e"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "1jizouGpghUZ"
-      },
-      "source": [
-        "## Setting up the Doom Environment in sample-factory"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "bCgZbeiavcDU"
-      },
-      "outputs": [],
-      "source": [
-        "import functools\n",
-        "\n",
-        "from sample_factory.algo.utils.context import global_model_factory\n",
-        "from sample_factory.cfg.arguments import parse_full_cfg, parse_sf_args\n",
-        "from sample_factory.envs.env_utils import register_env\n",
-        "from sample_factory.train import run_rl\n",
-        "\n",
-        "from sf_examples.vizdoom.doom.doom_model import make_vizdoom_encoder\n",
-        "from sf_examples.vizdoom.doom.doom_params import add_doom_env_args, doom_override_defaults\n",
-        "from sf_examples.vizdoom.doom.doom_utils import DOOM_ENVS, make_doom_env_from_spec\n",
-        "\n",
-        "\n",
-        "# Registers all the ViZDoom environments\n",
-        "def register_vizdoom_envs():\n",
-        "    for env_spec in DOOM_ENVS:\n",
-        "        make_env_func = functools.partial(make_doom_env_from_spec, env_spec)\n",
-        "        register_env(env_spec.name, make_env_func)\n",
-        "\n",
-        "# Sample Factory allows the registration of a custom Neural Network architecture\n",
-        "# See https://github.com/alex-petrenko/sample-factory/blob/master/sf_examples/vizdoom/doom/doom_model.py for more details\n",
-        "def register_vizdoom_models():\n",
-        "    global_model_factory().register_encoder_factory(make_vizdoom_encoder)\n",
-        "\n",
-        "\n",
-        "def register_vizdoom_components():\n",
-        "    register_vizdoom_envs()\n",
-        "    register_vizdoom_models()\n",
-        "\n",
-        "# parse the command line args and create a config\n",
-        "def parse_vizdoom_cfg(argv=None, evaluation=False):\n",
-        "    parser, _ = parse_sf_args(argv=argv, evaluation=evaluation)\n",
-        "    # parameters specific to Doom envs\n",
-        "    add_doom_env_args(parser)\n",
-        "    # override Doom default values for algo parameters\n",
-        "    doom_override_defaults(parser)\n",
-        "    # second parsing pass yields the final configuration\n",
-        "    final_cfg = parse_full_cfg(parser, argv)\n",
-        "    return final_cfg"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "sgRy6wnrgnij"
-      },
-      "source": [
-        "Now that the setup if complete, we can train the agent. We have chosen here to learn a ViZDoom task called `Health Gathering Supreme`.\n",
-        "\n",
-        "### The scenario: Health Gathering Supreme\n",
-        "\n",
-        "<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit9/Health-Gathering-Supreme.png\" alt=\"Health-Gathering-Supreme\"/>\n",
-        "\n",
-        "\n",
-        "\n",
-        "The objective of this scenario is to **teach the agent how to survive without knowing what makes him survive**. Agent know only that **life is precious** and death is bad so **it must learn what prolongs his existence and that his health is connected with it**.\n",
-        "\n",
-        "Map is a rectangle containing walls and with a green, acidic floor which **hurts the player periodically**. Initially there are some medkits spread uniformly over the map. A new medkit falls from the skies every now and then. **Medkits heal some portions of player's health** - to survive agent needs to pick them up. Episode finishes after player's death or on timeout.\n",
-        "\n",
-        "Further configuration:\n",
-        "- Living_reward = 1\n",
-        "- 3 available buttons: turn left, turn right, move forward\n",
-        "- 1 available game variable: HEALTH\n",
-        "- death penalty = 100\n",
-        "\n",
-        "You can find out more about the scenarios available in ViZDoom [here](https://github.com/Farama-Foundation/ViZDoom/tree/master/scenarios).\n",
-        "\n",
-        "There are also a number of more complex scenarios that have been create for ViZDoom, such as the ones detailed on [this github page](https://github.com/edbeeching/3d_control_deep_rl).\n",
-        "\n"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "siHZZ34DiZEp"
-      },
-      "source": [
-        "## Training the agent\n",
-        "- We're going to train the agent for 4000000 steps it will take approximately 20min"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "y_TeicMvyKHP"
-      },
-      "outputs": [],
-      "source": [
-        "## Start the training, this should take around 15 minutes\n",
-        "register_vizdoom_components()\n",
-        "\n",
-        "# The scenario we train on today is health gathering\n",
-        "# other scenarios include \"doom_basic\", \"doom_two_colors_easy\", \"doom_dm\", \"doom_dwango5\", \"doom_my_way_home\", \"doom_deadly_corridor\", \"doom_defend_the_center\", \"doom_defend_the_line\"\n",
-        "env = \"doom_health_gathering_supreme\"\n",
-        "cfg = parse_vizdoom_cfg(argv=[f\"--env={env}\", \"--num_workers=8\", \"--num_envs_per_worker=4\", \"--train_for_env_steps=4000000\"])\n",
-        "\n",
-        "status = run_rl(cfg)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "5L0nBS9e_jqC"
-      },
-      "source": [
-        "## Let's take a look at the performance of the trained policy and output a video of the agent."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "MGSA4Kg5_i0j"
-      },
-      "outputs": [],
-      "source": [
-        "from sample_factory.enjoy import enjoy\n",
-        "cfg = parse_vizdoom_cfg(argv=[f\"--env={env}\", \"--num_workers=1\", \"--save_video\", \"--no_render\", \"--max_num_episodes=10\"], evaluation=True)\n",
-        "status = enjoy(cfg)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Lj5L1x0WLxwB"
-      },
-      "source": [
-        "## Now lets visualize the performance of the agent"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "WsXhBY7JNOdJ"
-      },
-      "outputs": [],
-      "source": [
-        "from base64 import b64encode\n",
-        "from IPython.display import HTML\n",
-        "\n",
-        "mp4 = open('/content/train_dir/default_experiment/replay.mp4','rb').read()\n",
-        "data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n",
-        "HTML(\"\"\"\n",
-        "<video width=640 controls>\n",
-        "      <source src=\"%s\" type=\"video/mp4\">\n",
-        "</video>\n",
-        "\"\"\" % data_url)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "The agent has learned something, but its performance could be better. We would clearly need to train for longer. But let's upload this model to the Hub."
-      ],
-      "metadata": {
-        "id": "2A4pf_1VwPqR"
-      }
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "CSQVWF0kNuy9"
-      },
-      "source": [
-        "## Now lets upload your checkpoint and video to the Hugging Face Hub\n",
-        "\n",
-        "\n"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "JquRrWytA6eo"
-      },
-      "source": [
-        "To be able to share your model with the community there are three more steps to follow:\n",
-        "\n",
-        "1️⃣ (If it's not already done) create an account to HF ➡ https://huggingface.co/join\n",
-        "\n",
-        "2️⃣ Sign in and then, you need to store your authentication token from the Hugging Face website.\n",
-        "- Create a new token (https://huggingface.co/settings/tokens) **with write role**\n",
-        "\n",
-        "<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/notebooks/create-token.jpg\" alt=\"Create HF Token\">\n",
-        "\n",
-        "- Copy the token\n",
-        "- Run the cell below and paste the token"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "_tsf2uv0g_4p"
-      },
-      "source": [
-        "If you don't want to use a Google Colab or a Jupyter Notebook, you need to use this command instead: `huggingface-cli login`"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "GoQm_jYSOts0"
-      },
-      "outputs": [],
-      "source": [
-        "from huggingface_hub import notebook_login\n",
-        "notebook_login()\n",
-        "!git config --global credential.helper store"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "sEawW_i0OvJV"
-      },
-      "outputs": [],
-      "source": [
-        "from sample_factory.enjoy import enjoy\n",
-        "\n",
-        "hf_username = \"ThomasSimonini\" # insert your HuggingFace username here\n",
-        "\n",
-        "cfg = parse_vizdoom_cfg(argv=[f\"--env={env}\", \"--num_workers=1\", \"--save_video\", \"--no_render\", \"--max_num_episodes=10\", \"--max_num_frames=100000\", \"--push_to_hub\", f\"--hf_repository={hf_username}/rl_course_vizdoom_health_gathering_supreme\"], evaluation=True)\n",
-        "status = enjoy(cfg)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## Let's load another model\n",
-        "\n",
-        "\n"
-      ],
-      "metadata": {
-        "id": "9PzeXx-qxVvw"
-      }
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "mHZAWSgL5F7P"
-      },
-      "source": [
-        "This agent's performance was good, but can do better! Let's download and visualize an agent trained for 10B timesteps from the hub."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "Ud6DwAUl5S-l"
-      },
-      "outputs": [],
-      "source": [
-        "#download the agent from the hub\n",
-        "!python -m sample_factory.huggingface.load_from_hub -r edbeeching/doom_health_gathering_supreme_2222 -d ./train_dir\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "qoUJhL6x6sY5"
-      },
-      "outputs": [],
-      "source": [
-        "!ls train_dir/doom_health_gathering_supreme_2222"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "lZskc8LG8qr8"
-      },
-      "outputs": [],
-      "source": [
-        "env = \"doom_health_gathering_supreme\"\n",
-        "cfg = parse_vizdoom_cfg(argv=[f\"--env={env}\", \"--num_workers=1\", \"--save_video\", \"--no_render\", \"--max_num_episodes=10\", \"--experiment=doom_health_gathering_supreme_2222\", \"--train_dir=train_dir\"], evaluation=True)\n",
-        "status = enjoy(cfg)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "BtzXBoj65Wmq"
-      },
-      "outputs": [],
-      "source": [
-        "mp4 = open('/content/train_dir/doom_health_gathering_supreme_2222/replay.mp4','rb').read()\n",
-        "data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n",
-        "HTML(\"\"\"\n",
-        "<video width=640 controls>\n",
-        "      <source src=\"%s\" type=\"video/mp4\">\n",
-        "</video>\n",
-        "\"\"\" % data_url)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## Some additional challenges 🏆: Doom Deathmatch\n",
-        "\n",
-        "Training an agent to play a Doom deathmatch **takes many hours on a more beefy machine than is available in Colab**.\n",
-        "\n",
-        "Fortunately, we have have **already trained an agent in this scenario and it is available in the 🤗 Hub!** Let’s download the model and visualize the agent’s performance."
-      ],
-      "metadata": {
-        "id": "ie5YWC3NyKO8"
-      }
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "fq3WFeus81iI"
-      },
-      "outputs": [],
-      "source": [
-        "# Download the agent from the hub\n",
-        "!python -m sample_factory.huggingface.load_from_hub -r edbeeching/doom_deathmatch_bots_2222 -d ./train_dir"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "Given the agent plays for a long time the video generation can take **10 minutes**."
-      ],
-      "metadata": {
-        "id": "7AX_LwxR2FQ0"
-      }
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "0hq6XL__85Bv"
-      },
-      "outputs": [],
-      "source": [
-        "\n",
-        "from sample_factory.enjoy import enjoy\n",
-        "register_vizdoom_components()\n",
-        "env = \"doom_deathmatch_bots\"\n",
-        "cfg = parse_vizdoom_cfg(argv=[f\"--env={env}\", \"--num_workers=1\", \"--save_video\", \"--no_render\", \"--max_num_episodes=1\", \"--experiment=doom_deathmatch_bots_2222\", \"--train_dir=train_dir\"], evaluation=True)\n",
-        "status = enjoy(cfg)\n",
-        "mp4 = open('/content/train_dir/doom_deathmatch_bots_2222/replay.mp4','rb').read()\n",
-        "data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n",
-        "HTML(\"\"\"\n",
-        "<video width=640 controls>\n",
-        "      <source src=\"%s\" type=\"video/mp4\">\n",
-        "</video>\n",
-        "\"\"\" % data_url)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "\n",
-        "You **can try to train your agent in this environment** using the code above, but not on colab.\n",
-        "**Good luck 🤞**"
-      ],
-      "metadata": {
-        "id": "N6mEC-4zyihx"
-      }
-    },
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "view-in-github"
+   },
+   "source": [
+    "<a href=\"https://colab.research.google.com/github/huggingface/deep-rl-class/blob/main/notebooks/unit8/unit8_part2.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "OVx1gdg9wt9t"
+   },
+   "source": [
+    "# Unit 8 Part 2: Advanced Deep Reinforcement Learning. Using Sample Factory to play Doom from pixels\n",
+    "\n",
+    "<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit9/thumbnail2.png\" alt=\"Thumbnail\"/>\n",
+    "\n",
+    "In this notebook, we will learn how to train a Deep Neural Network to collect objects in a 3D environment based on the game of Doom, a video of the resulting policy is shown below. We train this policy using [Sample Factory](https://www.samplefactory.dev/), an asynchronous implementation of the PPO algorithm.\n",
+    "\n",
+    "Please note the following points:\n",
+    "\n",
+    "*   [Sample Factory](https://www.samplefactory.dev/) is an advanced RL framework and **only functions on Linux and Mac** (not Windows).\n",
+    "\n",
+    "*  The framework performs best on a **GPU machine with many CPU cores**, where it can achieve speeds of 100k interactions per second. The resources available on a standard Colab notebook **limit the performance of this library**. So the speed in this setting **does not reflect the real-world performance**.\n",
+    "* Benchmarks for Sample Factory are available in a number of settings, check out the [examples](https://github.com/alex-petrenko/sample-factory/tree/master/sf_examples) if you want to find out more.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "I6_67HfI1CKg"
+   },
+   "outputs": [],
+   "source": [
+    "from IPython.display import HTML\n",
+    "\n",
+    "HTML('''<video width=\"640\" height=\"480\" controls>\n",
+    "  <source src=\"https://huggingface.co/edbeeching/doom_health_gathering_supreme_3333/resolve/main/replay.mp4\"\n",
+    "  type=\"video/mp4\">Your browser does not support the video tag.</video>'''\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "DgHRAsYEXdyw"
+   },
+   "source": [
+    "To validate this hands-on for the [certification process](https://huggingface.co/deep-rl-course/en/unit0/introduction#certification-process), you need to push one model:\n",
+    "\n",
+    "- `doom_health_gathering_supreme` get a result of >= 5.\n",
+    "\n",
+    "To find your result, go to the [leaderboard](https://huggingface.co/spaces/huggingface-projects/Deep-Reinforcement-Learning-Leaderboard) and find your model, **the result = mean_reward - std of reward**\n",
+    "\n",
+    "If you don't find your model, **go to the bottom of the page and click on the refresh button**\n",
+    "\n",
+    "For more information about the certification process, check this section 👉 https://huggingface.co/deep-rl-course/en/unit0/introduction#certification-process"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "PU4FVzaoM6fC"
+   },
+   "source": [
+    "## Set the GPU 💪\n",
+    "- To **accelerate the agent's training, we'll use a GPU**. To do that, go to `Runtime > Change Runtime type`\n",
+    "\n",
+    "<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/notebooks/gpu-step1.jpg\" alt=\"GPU Step 1\">"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "KV0NyFdQM9ZG"
+   },
+   "source": [
+    "- `Hardware Accelerator > GPU`\n",
+    "\n",
+    "<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/notebooks/gpu-step2.jpg\" alt=\"GPU Step 2\">"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "-fSy5HzUcMWB"
+   },
+   "source": [
+    "Before starting to train our agent, let's **study the library and environments we're going to use**.\n",
+    "\n",
+    "## Sample Factory\n",
+    "\n",
+    "[Sample Factory](https://www.samplefactory.dev/) is one of the **fastest RL libraries focused on very efficient synchronous and asynchronous implementations of policy gradients (PPO)**.\n",
+    "\n",
+    "Sample Factory is thoroughly **tested, used by many researchers and practitioners**, and is actively maintained. Our implementation is known to **reach SOTA performance in a variety of domains while minimizing RL experiment training time and hardware requirements**.\n",
+    "\n",
+    "<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit9/samplefactoryenvs.png\" alt=\"Sample factory\"/>\n",
+    "\n",
+    "\n",
+    "\n",
+    "### Key features\n",
+    "\n",
+    "- Highly optimized algorithm [architecture](https://www.samplefactory.dev/06-architecture/overview/) for maximum learning throughput\n",
+    "- [Synchronous and asynchronous](https://www.samplefactory.dev/07-advanced-topics/sync-async/) training regimes\n",
+    "- [Serial (single-process) mode](https://www.samplefactory.dev/07-advanced-topics/serial-mode/) for easy debugging\n",
+    "- Optimal performance in both CPU-based and [GPU-accelerated environments](https://www.samplefactory.dev/09-environment-integrations/isaacgym/)\n",
+    "- Single- & multi-agent training, self-play, supports [training multiple policies](https://www.samplefactory.dev/07-advanced-topics/multi-policy-training/) at once on one or many GPUs\n",
+    "- Population-Based Training ([PBT](https://www.samplefactory.dev/07-advanced-topics/pbt/))\n",
+    "- Discrete, continuous, hybrid action spaces\n",
+    "- Vector-based, image-based, dictionary observation spaces\n",
+    "- Automatically creates a model architecture by parsing action/observation space specification. Supports [custom model architectures](https://www.samplefactory.dev/03-customization/custom-models/)\n",
+    "- Designed to be imported into other projects, [custom environments](https://www.samplefactory.dev/03-customization/custom-environments/) are first-class citizens\n",
+    "- Detailed [WandB and Tensorboard summaries](https://www.samplefactory.dev/05-monitoring/metrics-reference/), [custom metrics](https://www.samplefactory.dev/05-monitoring/custom-metrics/)\n",
+    "- [HuggingFace 🤗 integration](https://www.samplefactory.dev/10-huggingface/huggingface/) (upload trained models and metrics to the Hub)\n",
+    "- [Multiple](https://www.samplefactory.dev/09-environment-integrations/mujoco/) [example](https://www.samplefactory.dev/09-environment-integrations/atari/) [environment](https://www.samplefactory.dev/09-environment-integrations/vizdoom/) [integrations](https://www.samplefactory.dev/09-environment-integrations/dmlab/) with tuned parameters and trained models\n",
+    "\n",
+    "All of the above policies are available on the 🤗 hub. Search for the tag [sample-factory](https://huggingface.co/models?library=sample-factory&sort=downloads)\n",
+    "\n",
+    "### How sample-factory works\n",
+    "\n",
+    "Sample-factory is one of the **most highly optimized RL implementations available to the community**.\n",
+    "\n",
+    "It works by **spawning multiple processes that run rollout workers, inference workers and a learner worker**.\n",
+    "\n",
+    "The *workers* **communicate through shared memory, which lowers the communication cost between processes**.\n",
+    "\n",
+    "The *rollout workers* interact with the environment and send observations to the *inference workers*.\n",
+    "\n",
+    "The *inferences workers* query a fixed version of the policy and **send actions back to the rollout worker**.\n",
+    "\n",
+    "After *k* steps the rollout works send a trajectory of experience to the learner worker, **which it uses to update the agent’s policy network**.\n",
+    "\n",
+    "<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit9/samplefactory.png\" alt=\"Sample factory\"/>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "nB68Eb9UgC94"
+   },
+   "source": [
+    "### Actor Critic models in Sample-factory\n",
+    "\n",
+    "Actor Critic models in Sample Factory are composed of three components:\n",
+    "\n",
+    "- **Encoder** - Process input observations (images, vectors) and map them to a vector. This is the part of the model you will most likely want to customize.\n",
+    "- **Core** - Intergrate vectors from one or more encoders, can optionally include a single- or multi-layer LSTM/GRU in a memory-based agent.\n",
+    "- **Decoder** - Apply additional layers to the output of the model core before computing the policy and value outputs.\n",
+    "\n",
+    "The library has been designed to automatically support any observation and action spaces. Users can easily add their custom models. You can find out more in the [documentation](https://www.samplefactory.dev/03-customization/custom-models/#actor-critic-models-in-sample-factory)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "ez5UhUtYcWXF"
+   },
+   "source": [
+    "## ViZDoom\n",
+    "\n",
+    "[ViZDoom](https://vizdoom.cs.put.edu.pl/) is an **open-source python interface for the Doom Engine**.\n",
+    "\n",
+    "The library was created in 2016 by Marek Wydmuch, Michal Kempka  at the Institute of Computing Science, Poznan University of Technology, Poland.\n",
+    "\n",
+    "The library enables the **training of agents directly from the screen pixels in a number of scenarios**, including team deathmatch, shown in the video below. Because the ViZDoom environment is based on a game the was created in the 90s, it can be run on modern hardware at accelerated speeds, **allowing us to learn complex AI behaviors fairly quickly**.\n",
+    "\n",
+    "The library includes feature such as:\n",
+    "\n",
+    "- Multi-platform (Linux, macOS, Windows),\n",
+    "- API for Python and C++,\n",
+    "- [OpenAI Gym](https://www.gymlibrary.dev/) environment wrappers\n",
+    "- Easy-to-create custom scenarios (visual editors, scripting language, and examples available),\n",
+    "- Async and sync single-player and multiplayer modes,\n",
+    "- Lightweight (few MBs) and fast (up to 7000 fps in sync mode, single-threaded),\n",
+    "- Customizable resolution and rendering parameters,\n",
+    "- Access to the depth buffer (3D vision),\n",
+    "- Automatic labeling of game objects visible in the frame,\n",
+    "- Access to the audio buffer\n",
+    "- Access to the list of actors/objects and map geometry,\n",
+    "- Off-screen rendering and episode recording,\n",
+    "- Time scaling in async mode."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "wAMwza0d5QVj"
+   },
+   "source": [
+    "## We first need to install some dependencies that are required for the ViZDoom environment\n",
+    "\n",
+    "Now that our Colab runtime is set up, we can start by installing the dependencies required to run ViZDoom on linux.\n",
+    "\n",
+    "If you are following on your machine on Mac, you will want to follow the installation instructions on the [github page](https://github.com/Farama-Foundation/ViZDoom/blob/master/doc/Quickstart.md#-quickstart-for-macos-and-anaconda3-python-36)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "RJMxkaldwIVx"
+   },
+   "outputs": [],
+   "source": [
+    "%%capture\n",
+    "%%bash\n",
+    "# Install ViZDoom deps from\n",
+    "# https://github.com/mwydmuch/ViZDoom/blob/master/doc/Building.md#-linux\n",
+    "\n",
+    "apt-get install build-essential zlib1g-dev libsdl2-dev libjpeg-dev \\\n",
+    "nasm tar libbz2-dev libgtk2.0-dev cmake git libfluidsynth-dev libgme-dev \\\n",
+    "libopenal-dev timidity libwildmidi-dev unzip ffmpeg\n",
+    "\n",
+    "# Boost libraries\n",
+    "apt-get install libboost-all-dev\n",
+    "\n",
+    "# Lua binding dependencies\n",
+    "apt-get install liblua5.1-dev"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "JT4att2c57MW"
+   },
+   "source": [
+    "## Then we can install Sample Factory and ViZDoom\n",
+    "- This can take 7min"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "bbqfPZnIsvA6"
+   },
+   "outputs": [],
+   "source": [
+    "# install python libraries\n",
+    "# thanks toinsson\n",
+    "!pip install faster-fifo==1.4.2\n",
+    "!pip install vizdoom"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "alxUt7Au-O8e"
+   },
+   "outputs": [],
+   "source": [
+    "!pip install sample-factory==2.0.2"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "1jizouGpghUZ"
+   },
+   "source": [
+    "## Setting up the Doom Environment in sample-factory"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "id": "bCgZbeiavcDU",
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import functools\n",
+    "\n",
+    "from sample_factory.algo.utils.context import global_model_factory\n",
+    "from sample_factory.cfg.arguments import parse_full_cfg, parse_sf_args\n",
+    "from sample_factory.envs.env_utils import register_env\n",
+    "from sample_factory.train import run_rl\n",
+    "\n",
+    "from sf_examples.vizdoom.doom.doom_model import make_vizdoom_encoder\n",
+    "from sf_examples.vizdoom.doom.doom_params import add_doom_env_args, doom_override_defaults\n",
+    "from sf_examples.vizdoom.doom.doom_utils import DOOM_ENVS, make_doom_env_from_spec\n",
+    "\n",
+    "\n",
+    "# Registers all the ViZDoom environments\n",
+    "def register_vizdoom_envs():\n",
+    "    for env_spec in DOOM_ENVS:\n",
+    "        make_env_func = functools.partial(make_doom_env_from_spec, env_spec)\n",
+    "        register_env(env_spec.name, make_env_func)\n",
+    "\n",
+    "# Sample Factory allows the registration of a custom Neural Network architecture\n",
+    "# See https://github.com/alex-petrenko/sample-factory/blob/master/sf_examples/vizdoom/doom/doom_model.py for more details\n",
+    "def register_vizdoom_models():\n",
+    "    global_model_factory().register_encoder_factory(make_vizdoom_encoder)\n",
+    "\n",
+    "\n",
+    "def register_vizdoom_components():\n",
+    "    register_vizdoom_envs()\n",
+    "    register_vizdoom_models()\n",
+    "\n",
+    "# parse the command line args and create a config\n",
+    "def parse_vizdoom_cfg(argv=None, evaluation=False):\n",
+    "    parser, _ = parse_sf_args(argv=argv, evaluation=evaluation)\n",
+    "    # parameters specific to Doom envs\n",
+    "    add_doom_env_args(parser)\n",
+    "    # override Doom default values for algo parameters\n",
+    "    doom_override_defaults(parser)\n",
+    "    # second parsing pass yields the final configuration\n",
+    "    final_cfg = parse_full_cfg(parser, argv)\n",
+    "    return final_cfg"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "sgRy6wnrgnij"
+   },
+   "source": [
+    "Now that the setup if complete, we can train the agent. We have chosen here to learn a ViZDoom task called `Health Gathering Supreme`.\n",
+    "\n",
+    "### The scenario: Health Gathering Supreme\n",
+    "\n",
+    "<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit9/Health-Gathering-Supreme.png\" alt=\"Health-Gathering-Supreme\"/>\n",
+    "\n",
+    "\n",
+    "\n",
+    "The objective of this scenario is to **teach the agent how to survive without knowing what makes him survive**. Agent know only that **life is precious** and death is bad so **it must learn what prolongs his existence and that his health is connected with it**.\n",
+    "\n",
+    "Map is a rectangle containing walls and with a green, acidic floor which **hurts the player periodically**. Initially there are some medkits spread uniformly over the map. A new medkit falls from the skies every now and then. **Medkits heal some portions of player's health** - to survive agent needs to pick them up. Episode finishes after player's death or on timeout.\n",
+    "\n",
+    "Further configuration:\n",
+    "- Living_reward = 1\n",
+    "- 3 available buttons: turn left, turn right, move forward\n",
+    "- 1 available game variable: HEALTH\n",
+    "- death penalty = 100\n",
+    "\n",
+    "You can find out more about the scenarios available in ViZDoom [here](https://github.com/Farama-Foundation/ViZDoom/tree/master/scenarios).\n",
+    "\n",
+    "There are also a number of more complex scenarios that have been create for ViZDoom, such as the ones detailed on [this github page](https://github.com/edbeeching/3d_control_deep_rl).\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "siHZZ34DiZEp"
+   },
+   "source": [
+    "## Training the agent\n",
+    "- We're going to train the agent for 4000000 steps it will take approximately 20min"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "id": "y_TeicMvyKHP",
+    "scrolled": true,
+    "tags": []
+   },
+   "outputs": [
     {
-      "cell_type": "markdown",
-      "source": [
-        "If you prefer an easier scenario, **why not try training in another ViZDoom scenario such as `doom_deadly_corridor` or `doom_defend_the_center`.**\n",
-        "\n",
-        "\n",
-        "\n",
-        "---\n",
-        "\n",
-        "\n",
-        "This concludes the last unit. But we are not finished yet! 🤗 The following **bonus section include some of the most interesting, advanced and cutting edge work in Deep Reinforcement Learning**.\n",
-        "\n",
-        "## Keep learning, stay awesome 🤗"
-      ],
-      "metadata": {
-        "id": "YnDAngN6zeeI"
-      }
-    }
-  ],
-  "metadata": {
-    "accelerator": "GPU",
-    "colab": {
-      "provenance": [],
-      "collapsed_sections": [
-        "PU4FVzaoM6fC",
-        "nB68Eb9UgC94",
-        "ez5UhUtYcWXF",
-        "sgRy6wnrgnij"
-      ],
-      "private_outputs": true,
-      "include_colab_link": true
-    },
-    "gpuClass": "standard",
-    "kernelspec": {
-      "display_name": "Python 3",
-      "name": "python3"
-    },
-    "language_info": {
-      "name": "python"
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[36m[2023-08-17 11:34:48,430][121125] register_encoder_factory: <function make_vizdoom_encoder at 0x7efc38f46b90>\u001b[0m\n",
+      "\u001b[33m[2023-08-17 11:34:48,446][121125] Saved parameter configuration for experiment default_experiment not found!\u001b[0m\n",
+      "\u001b[33m[2023-08-17 11:34:48,446][121125] Starting experiment from scratch!\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:48,451][121125] Experiment dir /home/patonw/code/learn/deep-rl-class/notebooks/unit8/train_dir/default_experiment already exists!\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:48,452][121125] Resuming existing experiment from /home/patonw/code/learn/deep-rl-class/notebooks/unit8/train_dir/default_experiment...\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:48,452][121125] Weights and Biases integration disabled\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:49,349][121125] Queried available GPUs: 0\n",
+      "\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:49,349][121125] Environment var CUDA_VISIBLE_DEVICES is 0\n",
+      "\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:50,216][121197] Doom resolution: 160x120, resize resolution: (128, 72)\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:50,217][121197] Env info: EnvInfo(obs_space=Dict('obs': Box(0, 255, (3, 72, 128), uint8)), action_space=Discrete(5), num_agents=1, gpu_actions=False, gpu_observations=True, action_splits=None, all_discrete=None, frameskip=4, reward_shaping_scheme=None, env_info_protocol_version=1)\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:50,383][121125] Automatically setting recurrence to 32\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:50,383][121125] Starting experiment with the following configuration:\n",
+      "help=False\n",
+      "algo=APPO\n",
+      "env=doom_health_gathering_supreme\n",
+      "experiment=default_experiment\n",
+      "train_dir=/home/patonw/code/learn/deep-rl-class/notebooks/unit8/train_dir\n",
+      "restart_behavior=resume\n",
+      "device=gpu\n",
+      "seed=None\n",
+      "num_policies=1\n",
+      "async_rl=True\n",
+      "serial_mode=False\n",
+      "batched_sampling=False\n",
+      "num_batches_to_accumulate=2\n",
+      "worker_num_splits=2\n",
+      "policy_workers_per_policy=1\n",
+      "max_policy_lag=1000\n",
+      "num_workers=8\n",
+      "num_envs_per_worker=4\n",
+      "batch_size=1024\n",
+      "num_batches_per_epoch=1\n",
+      "num_epochs=1\n",
+      "rollout=32\n",
+      "recurrence=32\n",
+      "shuffle_minibatches=False\n",
+      "gamma=0.99\n",
+      "reward_scale=1.0\n",
+      "reward_clip=1000.0\n",
+      "value_bootstrap=False\n",
+      "normalize_returns=True\n",
+      "exploration_loss_coeff=0.001\n",
+      "value_loss_coeff=0.5\n",
+      "kl_loss_coeff=0.0\n",
+      "exploration_loss=symmetric_kl\n",
+      "gae_lambda=0.95\n",
+      "ppo_clip_ratio=0.1\n",
+      "ppo_clip_value=0.2\n",
+      "with_vtrace=False\n",
+      "vtrace_rho=1.0\n",
+      "vtrace_c=1.0\n",
+      "optimizer=adam\n",
+      "adam_eps=1e-06\n",
+      "adam_beta1=0.9\n",
+      "adam_beta2=0.999\n",
+      "max_grad_norm=4.0\n",
+      "learning_rate=0.0001\n",
+      "lr_schedule=constant\n",
+      "lr_schedule_kl_threshold=0.008\n",
+      "lr_adaptive_min=1e-06\n",
+      "lr_adaptive_max=0.01\n",
+      "obs_subtract_mean=0.0\n",
+      "obs_scale=255.0\n",
+      "normalize_input=True\n",
+      "normalize_input_keys=None\n",
+      "decorrelate_experience_max_seconds=0\n",
+      "decorrelate_envs_on_one_worker=True\n",
+      "actor_worker_gpus=[]\n",
+      "set_workers_cpu_affinity=True\n",
+      "force_envs_single_thread=False\n",
+      "default_niceness=0\n",
+      "log_to_file=True\n",
+      "experiment_summaries_interval=10\n",
+      "flush_summaries_interval=30\n",
+      "stats_avg=100\n",
+      "summaries_use_frameskip=True\n",
+      "heartbeat_interval=20\n",
+      "heartbeat_reporting_interval=600\n",
+      "train_for_env_steps=4000000\n",
+      "train_for_seconds=10000000000\n",
+      "save_every_sec=120\n",
+      "keep_checkpoints=2\n",
+      "load_checkpoint_kind=latest\n",
+      "save_milestones_sec=-1\n",
+      "save_best_every_sec=5\n",
+      "save_best_metric=reward\n",
+      "save_best_after=100000\n",
+      "benchmark=False\n",
+      "encoder_mlp_layers=[512, 512]\n",
+      "encoder_conv_architecture=convnet_simple\n",
+      "encoder_conv_mlp_layers=[512]\n",
+      "use_rnn=True\n",
+      "rnn_size=512\n",
+      "rnn_type=gru\n",
+      "rnn_num_layers=1\n",
+      "decoder_mlp_layers=[]\n",
+      "nonlinearity=elu\n",
+      "policy_initialization=orthogonal\n",
+      "policy_init_gain=1.0\n",
+      "actor_critic_share_weights=True\n",
+      "adaptive_stddev=True\n",
+      "continuous_tanh_scale=0.0\n",
+      "initial_stddev=1.0\n",
+      "use_env_info_cache=False\n",
+      "env_gpu_actions=False\n",
+      "env_gpu_observations=True\n",
+      "env_frameskip=4\n",
+      "env_framestack=1\n",
+      "pixel_format=CHW\n",
+      "use_record_episode_statistics=False\n",
+      "with_wandb=False\n",
+      "wandb_user=None\n",
+      "wandb_project=sample_factory\n",
+      "wandb_group=None\n",
+      "wandb_job_type=SF\n",
+      "wandb_tags=[]\n",
+      "with_pbt=False\n",
+      "pbt_mix_policies_in_one_env=True\n",
+      "pbt_period_env_steps=5000000\n",
+      "pbt_start_mutation=20000000\n",
+      "pbt_replace_fraction=0.3\n",
+      "pbt_mutation_rate=0.15\n",
+      "pbt_replace_reward_gap=0.1\n",
+      "pbt_replace_reward_gap_absolute=1e-06\n",
+      "pbt_optimize_gamma=False\n",
+      "pbt_target_objective=true_objective\n",
+      "pbt_perturb_min=1.1\n",
+      "pbt_perturb_max=1.5\n",
+      "num_agents=-1\n",
+      "num_humans=0\n",
+      "num_bots=-1\n",
+      "start_bot_difficulty=None\n",
+      "timelimit=None\n",
+      "res_w=128\n",
+      "res_h=72\n",
+      "wide_aspect_ratio=False\n",
+      "eval_env_frameskip=1\n",
+      "fps=35\n",
+      "command_line=--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=4000000\n",
+      "cli_args={'env': 'doom_health_gathering_supreme', 'num_workers': 8, 'num_envs_per_worker': 4, 'train_for_env_steps': 4000000}\n",
+      "git_hash=336df5a551fea3a2cf40925bf3083db6b4518c91\n",
+      "git_repo_name=https://github.com/huggingface/deep-rl-class\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:50,384][121125] Saving configuration to /home/patonw/code/learn/deep-rl-class/notebooks/unit8/train_dir/default_experiment/config.json...\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:50,404][121125] Rollout worker 0 uses device cpu\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:50,404][121125] Rollout worker 1 uses device cpu\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:50,405][121125] Rollout worker 2 uses device cpu\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:50,405][121125] Rollout worker 3 uses device cpu\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:50,406][121125] Rollout worker 4 uses device cpu\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:50,406][121125] Rollout worker 5 uses device cpu\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:50,406][121125] Rollout worker 6 uses device cpu\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:50,406][121125] Rollout worker 7 uses device cpu\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:50,440][121125] Using GPUs [0] for process 0 (actually maps to GPUs [0])\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:50,441][121125] InferenceWorker_p0-w0: min num requests: 2\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:50,458][121125] Starting all processes...\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:50,458][121125] Starting process learner_proc0\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:50,508][121125] Starting all processes...\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:50,512][121125] Starting process inference_proc0-0\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:50,512][121125] Starting process rollout_proc0\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:50,513][121125] Starting process rollout_proc1\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:50,514][121125] Starting process rollout_proc2\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:50,514][121125] Starting process rollout_proc3\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:50,514][121125] Starting process rollout_proc4\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:50,514][121125] Starting process rollout_proc5\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:50,514][121125] Starting process rollout_proc6\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:50,515][121125] Starting process rollout_proc7\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:51,414][121211] LearnerWorker_p0\tpid 121211\tparent 121125\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:51,414][121211] Using GPUs [0] for process 0 (actually maps to GPUs [0])\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:51,414][121211] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:51,424][121211] Num visible devices: 1\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:51,443][121211] Starting seed is not provided\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:51,444][121211] Using GPUs [0] for process 0 (actually maps to GPUs [0])\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:51,444][121211] Initializing actor-critic model on device cuda:0\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:51,444][121211] RunningMeanStd input shape: (3, 72, 128)\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:51,445][121211] RunningMeanStd input shape: (1,)\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:51,456][121211] ConvEncoder: input_channels=3\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:51,528][121211] Conv encoder output size: 512\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:51,528][121211] Policy head output size: 512\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:51,541][121211] Created Actor Critic model with architecture:\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:51,541][121211] ActorCriticSharedWeights(\n",
+      "  (obs_normalizer): ObservationNormalizer(\n",
+      "    (running_mean_std): RunningMeanStdDictInPlace(\n",
+      "      (running_mean_std): ModuleDict(\n",
+      "        (obs): RunningMeanStdInPlace()\n",
+      "      )\n",
+      "    )\n",
+      "  )\n",
+      "  (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)\n",
+      "  (encoder): VizdoomEncoder(\n",
+      "    (basic_encoder): ConvEncoder(\n",
+      "      (enc): RecursiveScriptModule(\n",
+      "        original_name=ConvEncoderImpl\n",
+      "        (conv_head): RecursiveScriptModule(\n",
+      "          original_name=Sequential\n",
+      "          (0): RecursiveScriptModule(original_name=Conv2d)\n",
+      "          (1): RecursiveScriptModule(original_name=ELU)\n",
+      "          (2): RecursiveScriptModule(original_name=Conv2d)\n",
+      "          (3): RecursiveScriptModule(original_name=ELU)\n",
+      "          (4): RecursiveScriptModule(original_name=Conv2d)\n",
+      "          (5): RecursiveScriptModule(original_name=ELU)\n",
+      "        )\n",
+      "        (mlp_layers): RecursiveScriptModule(\n",
+      "          original_name=Sequential\n",
+      "          (0): RecursiveScriptModule(original_name=Linear)\n",
+      "          (1): RecursiveScriptModule(original_name=ELU)\n",
+      "        )\n",
+      "      )\n",
+      "    )\n",
+      "  )\n",
+      "  (core): ModelCoreRNN(\n",
+      "    (core): GRU(512, 512)\n",
+      "  )\n",
+      "  (decoder): MlpDecoder(\n",
+      "    (mlp): Identity()\n",
+      "  )\n",
+      "  (critic_linear): Linear(in_features=512, out_features=1, bias=True)\n",
+      "  (action_parameterization): ActionParameterizationDefault(\n",
+      "    (distribution_linear): Linear(in_features=512, out_features=5, bias=True)\n",
+      "  )\n",
+      ")\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:51,558][121232] Rollout worker 6 starting...\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:51,558][121232] ROLLOUT worker 6\tpid 121232\tparent 121125\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:51,559][121232] Worker 6 uses CPU cores [18, 19, 20]\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:51,561][121228] Rollout worker 3 starting...\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:51,561][121228] ROLLOUT worker 3\tpid 121228\tparent 121125\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:51,561][121228] Worker 3 uses CPU cores [9, 10, 11]\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:51,567][121226] Rollout worker 0 starting...\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:51,567][121230] Rollout worker 5 starting...\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:51,567][121230] ROLLOUT worker 5\tpid 121230\tparent 121125\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:51,567][121226] ROLLOUT worker 0\tpid 121226\tparent 121125\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:51,567][121230] Worker 5 uses CPU cores [15, 16, 17]\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:51,567][121226] Worker 0 uses CPU cores [0, 1, 2]\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:51,573][121224] InferenceWorker_p0-w0\tpid 121224\tparent 121125\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:51,573][121224] Using GPUs [0] for process 0 (actually maps to GPUs [0])\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:51,573][121224] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:51,581][121224] Num visible devices: 1\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:51,585][121225] Rollout worker 1 starting...\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:51,585][121225] ROLLOUT worker 1\tpid 121225\tparent 121125\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:51,590][121229] Rollout worker 4 starting...\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:51,590][121229] ROLLOUT worker 4\tpid 121229\tparent 121125\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:51,590][121225] Worker 1 uses CPU cores [3, 4, 5]\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:51,593][121229] Worker 4 uses CPU cores [12, 13, 14]\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:51,613][121231] Rollout worker 7 starting...\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:51,613][121231] ROLLOUT worker 7\tpid 121231\tparent 121125\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:51,613][121231] Worker 7 uses CPU cores [21, 22, 23]\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:51,627][121227] Rollout worker 2 starting...\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:51,627][121227] ROLLOUT worker 2\tpid 121227\tparent 121125\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:51,628][121227] Worker 2 uses CPU cores [6, 7, 8]\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:53,156][121211] Using optimizer <class 'torch.optim.adam.Adam'>\u001b[0m\n",
+      "\u001b[33m[2023-08-17 11:34:53,157][121211] No checkpoints found\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:53,157][121211] Did not load from checkpoint, starting from scratch!\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:53,157][121211] Initialized policy 0 weights for model version 0\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:53,158][121211] LearnerWorker_p0 finished initialization!\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:53,158][121211] Using GPUs [0] for process 0 (actually maps to GPUs [0])\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:53,455][121125] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:53,703][121224] RunningMeanStd input shape: (3, 72, 128)\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:53,703][121224] RunningMeanStd input shape: (1,)\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:53,710][121224] ConvEncoder: input_channels=3\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:53,760][121224] Conv encoder output size: 512\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:53,760][121224] Policy head output size: 512\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:54,313][121125] Inference worker 0-0 is ready!\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:54,314][121125] All inference workers are ready! Signal rollout workers to start!\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:54,329][121226] Doom resolution: 160x120, resize resolution: (128, 72)\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:54,329][121229] Doom resolution: 160x120, resize resolution: (128, 72)\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:54,329][121231] Doom resolution: 160x120, resize resolution: (128, 72)\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:54,329][121232] Doom resolution: 160x120, resize resolution: (128, 72)\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:54,330][121227] Doom resolution: 160x120, resize resolution: (128, 72)\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:54,330][121228] Doom resolution: 160x120, resize resolution: (128, 72)\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:54,333][121225] Doom resolution: 160x120, resize resolution: (128, 72)\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:54,333][121230] Doom resolution: 160x120, resize resolution: (128, 72)\u001b[0m\n",
+      "Failed to create ./_vizdoom/ directory:\n",
+      "File exists\n",
+      "\u001b[33m[2023-08-17 11:34:54,460][121227] VizDoom game.init() threw an exception ViZDoomUnexpectedExitException('Controlled ViZDoom instance exited unexpectedly.'). Terminate process...\u001b[0m\n",
+      "\u001b[31m\u001b[1m[2023-08-17 11:34:54,461][121227] EvtLoop [rollout_proc2_evt_loop, process=rollout_proc2] unhandled exception in slot='init' connected to emitter=Emitter(object_id='Sampler', signal_name='_inference_workers_initialized'), args=()\u001b[0m\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/doom_gym.py\", line 228, in _game_init\n",
+      "    self.game.init()\n",
+      "vizdoom.vizdoom.ViZDoomUnexpectedExitException: Controlled ViZDoom instance exited unexpectedly.\n",
+      "\n",
+      "During handling of the above exception, another exception occurred:\n",
+      "\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 355, in _process_signal\n",
+      "    slot_callable(*args)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/sampling/rollout_worker.py\", line 150, in init\n",
+      "    env_runner.init(self.timing)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/sampling/non_batched_sampling.py\", line 418, in init\n",
+      "    self._reset()\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/sampling/non_batched_sampling.py\", line 430, in _reset\n",
+      "    observations, info = e.reset(seed=seed)  # new way of doing seeding since Gym 0.26.0\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 414, in reset\n",
+      "    return self.env.reset(seed=seed, options=options)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/utils/make_env.py\", line 125, in reset\n",
+      "    obs, info = self.env.reset(**kwargs)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/utils/make_env.py\", line 110, in reset\n",
+      "    obs, info = self.env.reset(**kwargs)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/wrappers/scenario_wrappers/gathering_reward_shaping.py\", line 30, in reset\n",
+      "    return self.env.reset(**kwargs)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 462, in reset\n",
+      "    obs, info = self.env.reset(seed=seed, options=options)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/envs/env_wrappers.py\", line 82, in reset\n",
+      "    obs, info = self.env.reset(**kwargs)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 414, in reset\n",
+      "    return self.env.reset(seed=seed, options=options)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/wrappers/multiplayer_stats.py\", line 51, in reset\n",
+      "    return self.env.reset(**kwargs)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/doom_gym.py\", line 323, in reset\n",
+      "    self._ensure_initialized()\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/doom_gym.py\", line 274, in _ensure_initialized\n",
+      "    self.initialize()\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/doom_gym.py\", line 269, in initialize\n",
+      "    self._game_init()\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/doom_gym.py\", line 244, in _game_init\n",
+      "    raise EnvCriticalError()\n",
+      "sample_factory.envs.env_utils.EnvCriticalError\n",
+      "\u001b[33m[2023-08-17 11:34:54,462][121227] Unhandled exception  in evt loop rollout_proc2_evt_loop\u001b[0m\n",
+      "Process rollout_proc2:\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/doom_gym.py\", line 228, in _game_init\n",
+      "    self.game.init()\n",
+      "vizdoom.vizdoom.ViZDoomUnexpectedExitException: Controlled ViZDoom instance exited unexpectedly.\n",
+      "\n",
+      "During handling of the above exception, another exception occurred:\n",
+      "\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/nix/store/1r6n7v2wam7gkr18gxccpg7p5ywgw551-python3-3.10.12/lib/python3.10/multiprocessing/process.py\", line 314, in _bootstrap\n",
+      "    self.run()\n",
+      "  File \"/nix/store/1r6n7v2wam7gkr18gxccpg7p5ywgw551-python3-3.10.12/lib/python3.10/multiprocessing/process.py\", line 108, in run\n",
+      "    self._target(*self._args, **self._kwargs)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 511, in _target\n",
+      "    self.event_loop.exec()\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 403, in exec\n",
+      "    raise exc\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 399, in exec\n",
+      "    while self._loop_iteration():\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 383, in _loop_iteration\n",
+      "    self._process_signal(s)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 358, in _process_signal\n",
+      "    raise exc\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 355, in _process_signal\n",
+      "    slot_callable(*args)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/sampling/rollout_worker.py\", line 150, in init\n",
+      "    env_runner.init(self.timing)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/sampling/non_batched_sampling.py\", line 418, in init\n",
+      "    self._reset()\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/sampling/non_batched_sampling.py\", line 430, in _reset\n",
+      "    observations, info = e.reset(seed=seed)  # new way of doing seeding since Gym 0.26.0\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 414, in reset\n",
+      "    return self.env.reset(seed=seed, options=options)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/utils/make_env.py\", line 125, in reset\n",
+      "    obs, info = self.env.reset(**kwargs)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/utils/make_env.py\", line 110, in reset\n",
+      "    obs, info = self.env.reset(**kwargs)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/wrappers/scenario_wrappers/gathering_reward_shaping.py\", line 30, in reset\n",
+      "    return self.env.reset(**kwargs)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 462, in reset\n",
+      "    obs, info = self.env.reset(seed=seed, options=options)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/envs/env_wrappers.py\", line 82, in reset\n",
+      "    obs, info = self.env.reset(**kwargs)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 414, in reset\n",
+      "    return self.env.reset(seed=seed, options=options)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/wrappers/multiplayer_stats.py\", line 51, in reset\n",
+      "    return self.env.reset(**kwargs)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/doom_gym.py\", line 323, in reset\n",
+      "    self._ensure_initialized()\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/doom_gym.py\", line 274, in _ensure_initialized\n",
+      "    self.initialize()\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/doom_gym.py\", line 269, in initialize\n",
+      "    self._game_init()\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/doom_gym.py\", line 244, in _game_init\n",
+      "    raise EnvCriticalError()\n",
+      "sample_factory.envs.env_utils.EnvCriticalError\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:54,535][121231] Decorrelating experience for 0 frames...\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:54,544][121226] Decorrelating experience for 0 frames...\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:54,548][121232] Decorrelating experience for 0 frames...\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:54,550][121228] Decorrelating experience for 0 frames...\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:54,550][121230] Decorrelating experience for 0 frames...\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:54,728][121231] Decorrelating experience for 32 frames...\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:54,746][121229] Decorrelating experience for 0 frames...\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:54,747][121228] Decorrelating experience for 32 frames...\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:54,747][121230] Decorrelating experience for 32 frames...\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:54,748][121232] Decorrelating experience for 32 frames...\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:54,934][121229] Decorrelating experience for 32 frames...\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:54,934][121226] Decorrelating experience for 32 frames...\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:54,954][121230] Decorrelating experience for 64 frames...\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:54,954][121228] Decorrelating experience for 64 frames...\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:54,955][121232] Decorrelating experience for 64 frames...\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:55,137][121226] Decorrelating experience for 64 frames...\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:55,138][121229] Decorrelating experience for 64 frames...\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:55,138][121231] Decorrelating experience for 64 frames...\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:55,138][121225] Decorrelating experience for 0 frames...\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:55,144][121232] Decorrelating experience for 96 frames...\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:55,336][121225] Decorrelating experience for 32 frames...\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:55,338][121231] Decorrelating experience for 96 frames...\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:55,367][121228] Decorrelating experience for 96 frames...\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:55,519][121225] Decorrelating experience for 64 frames...\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:55,524][121226] Decorrelating experience for 96 frames...\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:55,734][121230] Decorrelating experience for 96 frames...\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:55,737][121229] Decorrelating experience for 96 frames...\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:55,742][121225] Decorrelating experience for 96 frames...\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:56,232][121211] Signal inference workers to stop experience collection...\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:56,234][121224] InferenceWorker_p0-w0: stopping experience collection\u001b[0m\n",
+      "/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/torch/nn/modules/module.py:1501: UserWarning: operator() profile_node %43 : int[] = prim::profile_ivalue(%axis.1)\n",
+      " does not have profile information (Triggered internally at /build/source/third_party/nvfuser/csrc/graph_fuser.cpp:104.)\n",
+      "  return forward_call(*args, **kwargs)\n",
+      "\u001b[36m[2023-08-17 11:34:57,320][121211] Signal inference workers to resume experience collection...\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:57,320][121224] InferenceWorker_p0-w0: resuming experience collection\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:58,455][121125] Fps is (10 sec: 6553.7, 60 sec: 6553.7, 300 sec: 6553.7). Total num frames: 32768. Throughput: 0: 566.8. Samples: 2834. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:34:58,456][121125] Avg episode reward: [(0, '3.897')]\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:58,583][121224] Updated weights for policy 0, policy_version 10 (0.0188)\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:34:59,640][121224] Updated weights for policy 0, policy_version 20 (0.0006)\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:35:00,607][121224] Updated weights for policy 0, policy_version 30 (0.0005)\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:35:01,567][121224] Updated weights for policy 0, policy_version 40 (0.0005)\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:35:02,584][121224] Updated weights for policy 0, policy_version 50 (0.0006)\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:35:03,455][121125] Fps is (10 sec: 23347.3, 60 sec: 23347.3, 300 sec: 23347.3). Total num frames: 233472. Throughput: 0: 5847.0. Samples: 58470. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:35:03,456][121125] Avg episode reward: [(0, '4.655')]\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:35:03,457][121211] Saving new best policy, reward=4.655!\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:35:03,725][121224] Updated weights for policy 0, policy_version 60 (0.0006)\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:35:04,740][121224] Updated weights for policy 0, policy_version 70 (0.0006)\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:35:05,714][121224] Updated weights for policy 0, policy_version 80 (0.0005)\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:35:06,667][121224] Updated weights for policy 0, policy_version 90 (0.0005)\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:35:07,660][121224] Updated weights for policy 0, policy_version 100 (0.0005)\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:35:08,455][121125] Fps is (10 sec: 40959.9, 60 sec: 29491.3, 300 sec: 29491.3). Total num frames: 442368. Throughput: 0: 5926.0. Samples: 88890. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:35:08,456][121125] Avg episode reward: [(0, '4.598')]\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:35:08,628][121224] Updated weights for policy 0, policy_version 110 (0.0005)\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:35:09,699][121224] Updated weights for policy 0, policy_version 120 (0.0006)\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:35:10,436][121125] Heartbeat connected on Batcher_0\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:35:10,438][121125] Heartbeat connected on LearnerWorker_p0\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:35:10,442][121125] Heartbeat connected on InferenceWorker_p0-w0\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:35:10,444][121125] Heartbeat connected on RolloutWorker_w0\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:35:10,446][121125] Heartbeat connected on RolloutWorker_w1\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:35:10,450][121125] Heartbeat connected on RolloutWorker_w3\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:35:10,453][121125] Heartbeat connected on RolloutWorker_w4\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:35:10,455][121125] Heartbeat connected on RolloutWorker_w5\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:35:10,456][121125] Heartbeat connected on RolloutWorker_w6\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:35:10,459][121125] Heartbeat connected on RolloutWorker_w7\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:35:10,742][121224] Updated weights for policy 0, policy_version 130 (0.0005)\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:35:11,789][121224] Updated weights for policy 0, policy_version 140 (0.0005)\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:35:12,785][121224] Updated weights for policy 0, policy_version 150 (0.0005)\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:35:13,455][121125] Fps is (10 sec: 40550.2, 60 sec: 31948.8, 300 sec: 31948.8). Total num frames: 638976. Throughput: 0: 7468.3. Samples: 149366. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:35:13,456][121125] Avg episode reward: [(0, '4.646')]\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:35:13,856][121224] Updated weights for policy 0, policy_version 160 (0.0007)\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:35:14,953][121224] Updated weights for policy 0, policy_version 170 (0.0007)\u001b[0m\n",
+      "\u001b[31m\u001b[1m[2023-08-17 11:35:15,042][121230] EvtLoop [rollout_proc5_evt_loop, process=rollout_proc5] unhandled exception in slot='advance_rollouts' connected to emitter=Emitter(object_id='InferenceWorker_p0-w0', signal_name='advance5'), args=(0, 0)\u001b[0m\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 355, in _process_signal\n",
+      "    slot_callable(*args)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/sampling/rollout_worker.py\", line 241, in advance_rollouts\n",
+      "    complete_rollouts, episodic_stats = runner.advance_rollouts(policy_id, self.timing)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/sampling/non_batched_sampling.py\", line 634, in advance_rollouts\n",
+      "    new_obs, rewards, terminated, truncated, infos = e.step(actions)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 408, in step\n",
+      "    return self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/utils/make_env.py\", line 129, in step\n",
+      "    obs, rew, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/utils/make_env.py\", line 115, in step\n",
+      "    obs, rew, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/wrappers/scenario_wrappers/gathering_reward_shaping.py\", line 33, in step\n",
+      "    observation, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 469, in step\n",
+      "    observation, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/envs/env_wrappers.py\", line 86, in step\n",
+      "    obs, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 408, in step\n",
+      "    return self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/wrappers/multiplayer_stats.py\", line 54, in step\n",
+      "    obs, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/doom_gym.py\", line 452, in step\n",
+      "    reward = self.game.make_action(actions_flattened, self.skip_frames)\n",
+      "vizdoom.vizdoom.SignalException: Signal SIGINT received. ViZDoom instance has been closed.\n",
+      "\u001b[31m\u001b[1m[2023-08-17 11:35:15,042][121229] EvtLoop [rollout_proc4_evt_loop, process=rollout_proc4] unhandled exception in slot='advance_rollouts' connected to emitter=Emitter(object_id='InferenceWorker_p0-w0', signal_name='advance4'), args=(1, 0)\u001b[0m\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 355, in _process_signal\n",
+      "    slot_callable(*args)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/sampling/rollout_worker.py\", line 241, in advance_rollouts\n",
+      "    complete_rollouts, episodic_stats = runner.advance_rollouts(policy_id, self.timing)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/sampling/non_batched_sampling.py\", line 634, in advance_rollouts\n",
+      "    new_obs, rewards, terminated, truncated, infos = e.step(actions)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 408, in step\n",
+      "    return self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/utils/make_env.py\", line 129, in step\n",
+      "    obs, rew, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/utils/make_env.py\", line 115, in step\n",
+      "    obs, rew, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/wrappers/scenario_wrappers/gathering_reward_shaping.py\", line 33, in step\n",
+      "    observation, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 469, in step\n",
+      "    observation, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/envs/env_wrappers.py\", line 86, in step\n",
+      "    obs, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 408, in step\n",
+      "    return self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/wrappers/multiplayer_stats.py\", line 54, in step\n",
+      "    obs, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/doom_gym.py\", line 452, in step\n",
+      "    reward = self.game.make_action(actions_flattened, self.skip_frames)\n",
+      "vizdoom.vizdoom.SignalException: Signal SIGINT received. ViZDoom instance has been closed.\n",
+      "\u001b[33m[2023-08-17 11:35:15,043][121230] Unhandled exception Signal SIGINT received. ViZDoom instance has been closed. in evt loop rollout_proc5_evt_loop\u001b[0m\n",
+      "\u001b[33m[2023-08-17 11:35:15,043][121229] Unhandled exception Signal SIGINT received. ViZDoom instance has been closed. in evt loop rollout_proc4_evt_loop\u001b[0m\n",
+      "Process rollout_proc5:\n",
+      "Process rollout_proc4:\n",
+      "Traceback (most recent call last):\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/nix/store/1r6n7v2wam7gkr18gxccpg7p5ywgw551-python3-3.10.12/lib/python3.10/multiprocessing/process.py\", line 314, in _bootstrap\n",
+      "    self.run()\n",
+      "  File \"/nix/store/1r6n7v2wam7gkr18gxccpg7p5ywgw551-python3-3.10.12/lib/python3.10/multiprocessing/process.py\", line 108, in run\n",
+      "    self._target(*self._args, **self._kwargs)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 511, in _target\n",
+      "    self.event_loop.exec()\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 403, in exec\n",
+      "    raise exc\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 399, in exec\n",
+      "    while self._loop_iteration():\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 383, in _loop_iteration\n",
+      "    self._process_signal(s)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 358, in _process_signal\n",
+      "    raise exc\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 355, in _process_signal\n",
+      "    slot_callable(*args)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/sampling/rollout_worker.py\", line 241, in advance_rollouts\n",
+      "    complete_rollouts, episodic_stats = runner.advance_rollouts(policy_id, self.timing)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/sampling/non_batched_sampling.py\", line 634, in advance_rollouts\n",
+      "    new_obs, rewards, terminated, truncated, infos = e.step(actions)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 408, in step\n",
+      "    return self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/utils/make_env.py\", line 129, in step\n",
+      "    obs, rew, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/utils/make_env.py\", line 115, in step\n",
+      "    obs, rew, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/wrappers/scenario_wrappers/gathering_reward_shaping.py\", line 33, in step\n",
+      "    observation, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 469, in step\n",
+      "    observation, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/envs/env_wrappers.py\", line 86, in step\n",
+      "    obs, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 408, in step\n",
+      "    return self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/wrappers/multiplayer_stats.py\", line 54, in step\n",
+      "    obs, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/doom_gym.py\", line 452, in step\n",
+      "    reward = self.game.make_action(actions_flattened, self.skip_frames)\n",
+      "vizdoom.vizdoom.SignalException: Signal SIGINT received. ViZDoom instance has been closed.\n",
+      "\u001b[31m\u001b[1m[2023-08-17 11:35:15,042][121228] EvtLoop [rollout_proc3_evt_loop, process=rollout_proc3] unhandled exception in slot='advance_rollouts' connected to emitter=Emitter(object_id='InferenceWorker_p0-w0', signal_name='advance3'), args=(0, 0)\u001b[0m\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 355, in _process_signal\n",
+      "    slot_callable(*args)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/sampling/rollout_worker.py\", line 241, in advance_rollouts\n",
+      "    complete_rollouts, episodic_stats = runner.advance_rollouts(policy_id, self.timing)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/sampling/non_batched_sampling.py\", line 634, in advance_rollouts\n",
+      "    new_obs, rewards, terminated, truncated, infos = e.step(actions)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 408, in step\n",
+      "    return self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/utils/make_env.py\", line 129, in step\n",
+      "    obs, rew, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/utils/make_env.py\", line 115, in step\n",
+      "    obs, rew, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/wrappers/scenario_wrappers/gathering_reward_shaping.py\", line 33, in step\n",
+      "    observation, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 469, in step\n",
+      "    observation, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/envs/env_wrappers.py\", line 86, in step\n",
+      "    obs, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 408, in step\n",
+      "    return self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/wrappers/multiplayer_stats.py\", line 54, in step\n",
+      "    obs, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/doom_gym.py\", line 452, in step\n",
+      "    reward = self.game.make_action(actions_flattened, self.skip_frames)\n",
+      "vizdoom.vizdoom.SignalException: Signal SIGINT received. ViZDoom instance has been closed.\n",
+      "  File \"/nix/store/1r6n7v2wam7gkr18gxccpg7p5ywgw551-python3-3.10.12/lib/python3.10/multiprocessing/process.py\", line 314, in _bootstrap\n",
+      "    self.run()\n",
+      "  File \"/nix/store/1r6n7v2wam7gkr18gxccpg7p5ywgw551-python3-3.10.12/lib/python3.10/multiprocessing/process.py\", line 108, in run\n",
+      "    self._target(*self._args, **self._kwargs)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 511, in _target\n",
+      "    self.event_loop.exec()\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 403, in exec\n",
+      "    raise exc\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 399, in exec\n",
+      "    while self._loop_iteration():\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 383, in _loop_iteration\n",
+      "    self._process_signal(s)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 358, in _process_signal\n",
+      "    raise exc\n",
+      "\u001b[33m[2023-08-17 11:35:15,044][121228] Unhandled exception Signal SIGINT received. ViZDoom instance has been closed. in evt loop rollout_proc3_evt_loop\u001b[0m\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 355, in _process_signal\n",
+      "    slot_callable(*args)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/sampling/rollout_worker.py\", line 241, in advance_rollouts\n",
+      "    complete_rollouts, episodic_stats = runner.advance_rollouts(policy_id, self.timing)\n",
+      "Process rollout_proc3:\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/sampling/non_batched_sampling.py\", line 634, in advance_rollouts\n",
+      "    new_obs, rewards, terminated, truncated, infos = e.step(actions)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 408, in step\n",
+      "    return self.env.step(action)\n",
+      "\u001b[31m\u001b[1m[2023-08-17 11:35:15,043][121231] EvtLoop [rollout_proc7_evt_loop, process=rollout_proc7] unhandled exception in slot='advance_rollouts' connected to emitter=Emitter(object_id='InferenceWorker_p0-w0', signal_name='advance7'), args=(0, 0)\u001b[0m\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 355, in _process_signal\n",
+      "    slot_callable(*args)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/sampling/rollout_worker.py\", line 241, in advance_rollouts\n",
+      "    complete_rollouts, episodic_stats = runner.advance_rollouts(policy_id, self.timing)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/sampling/non_batched_sampling.py\", line 634, in advance_rollouts\n",
+      "    new_obs, rewards, terminated, truncated, infos = e.step(actions)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 408, in step\n",
+      "    return self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/utils/make_env.py\", line 129, in step\n",
+      "    obs, rew, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/utils/make_env.py\", line 115, in step\n",
+      "    obs, rew, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/wrappers/scenario_wrappers/gathering_reward_shaping.py\", line 33, in step\n",
+      "    observation, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 469, in step\n",
+      "    observation, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/envs/env_wrappers.py\", line 86, in step\n",
+      "    obs, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 408, in step\n",
+      "    return self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/wrappers/multiplayer_stats.py\", line 54, in step\n",
+      "    obs, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/doom_gym.py\", line 452, in step\n",
+      "    reward = self.game.make_action(actions_flattened, self.skip_frames)\n",
+      "vizdoom.vizdoom.SignalException: Signal SIGINT received. ViZDoom instance has been closed.\n",
+      "Traceback (most recent call last):\n",
+      "\u001b[33m[2023-08-17 11:35:15,044][121231] Unhandled exception Signal SIGINT received. ViZDoom instance has been closed. in evt loop rollout_proc7_evt_loop\u001b[0m\n",
+      "  File \"/nix/store/1r6n7v2wam7gkr18gxccpg7p5ywgw551-python3-3.10.12/lib/python3.10/multiprocessing/process.py\", line 314, in _bootstrap\n",
+      "    self.run()\n",
+      "  File \"/nix/store/1r6n7v2wam7gkr18gxccpg7p5ywgw551-python3-3.10.12/lib/python3.10/multiprocessing/process.py\", line 108, in run\n",
+      "    self._target(*self._args, **self._kwargs)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 511, in _target\n",
+      "    self.event_loop.exec()\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 403, in exec\n",
+      "    raise exc\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 399, in exec\n",
+      "    while self._loop_iteration():\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 383, in _loop_iteration\n",
+      "    self._process_signal(s)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 358, in _process_signal\n",
+      "    raise exc\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 355, in _process_signal\n",
+      "    slot_callable(*args)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/sampling/rollout_worker.py\", line 241, in advance_rollouts\n",
+      "    complete_rollouts, episodic_stats = runner.advance_rollouts(policy_id, self.timing)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/sampling/non_batched_sampling.py\", line 634, in advance_rollouts\n",
+      "    new_obs, rewards, terminated, truncated, infos = e.step(actions)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 408, in step\n",
+      "    return self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/utils/make_env.py\", line 129, in step\n",
+      "    obs, rew, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/utils/make_env.py\", line 115, in step\n",
+      "    obs, rew, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/wrappers/scenario_wrappers/gathering_reward_shaping.py\", line 33, in step\n",
+      "    observation, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 469, in step\n",
+      "    observation, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/envs/env_wrappers.py\", line 86, in step\n",
+      "    obs, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 408, in step\n",
+      "    return self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/wrappers/multiplayer_stats.py\", line 54, in step\n",
+      "    obs, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/doom_gym.py\", line 452, in step\n",
+      "    reward = self.game.make_action(actions_flattened, self.skip_frames)\n",
+      "vizdoom.vizdoom.SignalException: Signal SIGINT received. ViZDoom instance has been closed.\n",
+      "Process rollout_proc7:\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/utils/make_env.py\", line 129, in step\n",
+      "    obs, rew, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/utils/make_env.py\", line 115, in step\n",
+      "    obs, rew, terminated, truncated, info = self.env.step(action)\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/nix/store/1r6n7v2wam7gkr18gxccpg7p5ywgw551-python3-3.10.12/lib/python3.10/multiprocessing/process.py\", line 314, in _bootstrap\n",
+      "    self.run()\n",
+      "  File \"/nix/store/1r6n7v2wam7gkr18gxccpg7p5ywgw551-python3-3.10.12/lib/python3.10/multiprocessing/process.py\", line 108, in run\n",
+      "    self._target(*self._args, **self._kwargs)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 511, in _target\n",
+      "    self.event_loop.exec()\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 403, in exec\n",
+      "    raise exc\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 399, in exec\n",
+      "    while self._loop_iteration():\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 383, in _loop_iteration\n",
+      "    self._process_signal(s)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 358, in _process_signal\n",
+      "    raise exc\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 355, in _process_signal\n",
+      "    slot_callable(*args)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/sampling/rollout_worker.py\", line 241, in advance_rollouts\n",
+      "    complete_rollouts, episodic_stats = runner.advance_rollouts(policy_id, self.timing)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/sampling/non_batched_sampling.py\", line 634, in advance_rollouts\n",
+      "    new_obs, rewards, terminated, truncated, infos = e.step(actions)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 408, in step\n",
+      "    return self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/utils/make_env.py\", line 129, in step\n",
+      "    obs, rew, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/utils/make_env.py\", line 115, in step\n",
+      "    obs, rew, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/wrappers/scenario_wrappers/gathering_reward_shaping.py\", line 33, in step\n",
+      "    observation, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 469, in step\n",
+      "    observation, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/envs/env_wrappers.py\", line 86, in step\n",
+      "    obs, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 408, in step\n",
+      "    return self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/wrappers/multiplayer_stats.py\", line 54, in step\n",
+      "    obs, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/doom_gym.py\", line 452, in step\n",
+      "    reward = self.game.make_action(actions_flattened, self.skip_frames)\n",
+      "vizdoom.vizdoom.SignalException: Signal SIGINT received. ViZDoom instance has been closed.\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/wrappers/scenario_wrappers/gathering_reward_shaping.py\", line 33, in step\n",
+      "    observation, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 469, in step\n",
+      "    observation, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/envs/env_wrappers.py\", line 86, in step\n",
+      "    obs, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 408, in step\n",
+      "    return self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/wrappers/multiplayer_stats.py\", line 54, in step\n",
+      "    obs, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/doom_gym.py\", line 452, in step\n",
+      "    reward = self.game.make_action(actions_flattened, self.skip_frames)\n",
+      "vizdoom.vizdoom.SignalException: Signal SIGINT received. ViZDoom instance has been closed.\n",
+      "\u001b[31m\u001b[1m[2023-08-17 11:35:15,046][121232] EvtLoop [rollout_proc6_evt_loop, process=rollout_proc6] unhandled exception in slot='advance_rollouts' connected to emitter=Emitter(object_id='InferenceWorker_p0-w0', signal_name='advance6'), args=(0, 0)\u001b[0m\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 355, in _process_signal\n",
+      "    slot_callable(*args)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/sampling/rollout_worker.py\", line 241, in advance_rollouts\n",
+      "    complete_rollouts, episodic_stats = runner.advance_rollouts(policy_id, self.timing)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/sampling/non_batched_sampling.py\", line 634, in advance_rollouts\n",
+      "    new_obs, rewards, terminated, truncated, infos = e.step(actions)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 408, in step\n",
+      "    return self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/utils/make_env.py\", line 129, in step\n",
+      "    obs, rew, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/utils/make_env.py\", line 115, in step\n",
+      "    obs, rew, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/wrappers/scenario_wrappers/gathering_reward_shaping.py\", line 33, in step\n",
+      "    observation, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 469, in step\n",
+      "    observation, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/envs/env_wrappers.py\", line 86, in step\n",
+      "    obs, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 408, in step\n",
+      "    return self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/wrappers/multiplayer_stats.py\", line 54, in step\n",
+      "    obs, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/doom_gym.py\", line 452, in step\n",
+      "    reward = self.game.make_action(actions_flattened, self.skip_frames)\n",
+      "vizdoom.vizdoom.SignalException: Signal SIGINT received. ViZDoom instance has been closed.\n",
+      "\u001b[33m[2023-08-17 11:35:15,047][121232] Unhandled exception Signal SIGINT received. ViZDoom instance has been closed. in evt loop rollout_proc6_evt_loop\u001b[0m\n",
+      "Process rollout_proc6:\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/nix/store/1r6n7v2wam7gkr18gxccpg7p5ywgw551-python3-3.10.12/lib/python3.10/multiprocessing/process.py\", line 314, in _bootstrap\n",
+      "    self.run()\n",
+      "  File \"/nix/store/1r6n7v2wam7gkr18gxccpg7p5ywgw551-python3-3.10.12/lib/python3.10/multiprocessing/process.py\", line 108, in run\n",
+      "    self._target(*self._args, **self._kwargs)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 511, in _target\n",
+      "    self.event_loop.exec()\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 403, in exec\n",
+      "    raise exc\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 399, in exec\n",
+      "    while self._loop_iteration():\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 383, in _loop_iteration\n",
+      "    self._process_signal(s)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 358, in _process_signal\n",
+      "    raise exc\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 355, in _process_signal\n",
+      "    slot_callable(*args)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/sampling/rollout_worker.py\", line 241, in advance_rollouts\n",
+      "    complete_rollouts, episodic_stats = runner.advance_rollouts(policy_id, self.timing)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/sampling/non_batched_sampling.py\", line 634, in advance_rollouts\n",
+      "    new_obs, rewards, terminated, truncated, infos = e.step(actions)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 408, in step\n",
+      "    return self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/utils/make_env.py\", line 129, in step\n",
+      "    obs, rew, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/utils/make_env.py\", line 115, in step\n",
+      "    obs, rew, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/wrappers/scenario_wrappers/gathering_reward_shaping.py\", line 33, in step\n",
+      "    observation, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 469, in step\n",
+      "    observation, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/envs/env_wrappers.py\", line 86, in step\n",
+      "    obs, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 408, in step\n",
+      "    return self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/wrappers/multiplayer_stats.py\", line 54, in step\n",
+      "    obs, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/doom_gym.py\", line 452, in step\n",
+      "    reward = self.game.make_action(actions_flattened, self.skip_frames)\n",
+      "vizdoom.vizdoom.SignalException: Signal SIGINT received. ViZDoom instance has been closed.\n",
+      "\u001b[31m\u001b[1m[2023-08-17 11:35:15,046][121225] EvtLoop [rollout_proc1_evt_loop, process=rollout_proc1] unhandled exception in slot='advance_rollouts' connected to emitter=Emitter(object_id='InferenceWorker_p0-w0', signal_name='advance1'), args=(1, 0)\u001b[0m\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 355, in _process_signal\n",
+      "    slot_callable(*args)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/sampling/rollout_worker.py\", line 241, in advance_rollouts\n",
+      "    complete_rollouts, episodic_stats = runner.advance_rollouts(policy_id, self.timing)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/sampling/non_batched_sampling.py\", line 634, in advance_rollouts\n",
+      "    new_obs, rewards, terminated, truncated, infos = e.step(actions)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 408, in step\n",
+      "    return self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/utils/make_env.py\", line 129, in step\n",
+      "    obs, rew, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/utils/make_env.py\", line 115, in step\n",
+      "    obs, rew, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/wrappers/scenario_wrappers/gathering_reward_shaping.py\", line 33, in step\n",
+      "    observation, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 469, in step\n",
+      "    observation, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/envs/env_wrappers.py\", line 86, in step\n",
+      "    obs, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 408, in step\n",
+      "    return self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/wrappers/multiplayer_stats.py\", line 54, in step\n",
+      "    obs, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/doom_gym.py\", line 452, in step\n",
+      "    reward = self.game.make_action(actions_flattened, self.skip_frames)\n",
+      "vizdoom.vizdoom.SignalException: Signal SIGINT received. ViZDoom instance has been closed.\n",
+      "\u001b[33m[2023-08-17 11:35:15,047][121225] Unhandled exception Signal SIGINT received. ViZDoom instance has been closed. in evt loop rollout_proc1_evt_loop\u001b[0m\n",
+      "Process rollout_proc1:\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/nix/store/1r6n7v2wam7gkr18gxccpg7p5ywgw551-python3-3.10.12/lib/python3.10/multiprocessing/process.py\", line 314, in _bootstrap\n",
+      "    self.run()\n",
+      "  File \"/nix/store/1r6n7v2wam7gkr18gxccpg7p5ywgw551-python3-3.10.12/lib/python3.10/multiprocessing/process.py\", line 108, in run\n",
+      "    self._target(*self._args, **self._kwargs)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 511, in _target\n",
+      "    self.event_loop.exec()\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 403, in exec\n",
+      "    raise exc\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 399, in exec\n",
+      "    while self._loop_iteration():\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 383, in _loop_iteration\n",
+      "    self._process_signal(s)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 358, in _process_signal\n",
+      "    raise exc\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 355, in _process_signal\n",
+      "    slot_callable(*args)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/sampling/rollout_worker.py\", line 241, in advance_rollouts\n",
+      "    complete_rollouts, episodic_stats = runner.advance_rollouts(policy_id, self.timing)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/sampling/non_batched_sampling.py\", line 634, in advance_rollouts\n",
+      "    new_obs, rewards, terminated, truncated, infos = e.step(actions)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 408, in step\n",
+      "    return self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/utils/make_env.py\", line 129, in step\n",
+      "    obs, rew, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/utils/make_env.py\", line 115, in step\n",
+      "    obs, rew, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/wrappers/scenario_wrappers/gathering_reward_shaping.py\", line 33, in step\n",
+      "    observation, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 469, in step\n",
+      "    observation, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/envs/env_wrappers.py\", line 86, in step\n",
+      "    obs, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 408, in step\n",
+      "    return self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/wrappers/multiplayer_stats.py\", line 54, in step\n",
+      "    obs, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/doom_gym.py\", line 452, in step\n",
+      "    reward = self.game.make_action(actions_flattened, self.skip_frames)\n",
+      "vizdoom.vizdoom.SignalException: Signal SIGINT received. ViZDoom instance has been closed.\n",
+      "\u001b[31m\u001b[1m[2023-08-17 11:35:15,042][121226] EvtLoop [rollout_proc0_evt_loop, process=rollout_proc0] unhandled exception in slot='advance_rollouts' connected to emitter=Emitter(object_id='InferenceWorker_p0-w0', signal_name='advance0'), args=(0, 0)\u001b[0m\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 355, in _process_signal\n",
+      "    slot_callable(*args)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/sampling/rollout_worker.py\", line 241, in advance_rollouts\n",
+      "    complete_rollouts, episodic_stats = runner.advance_rollouts(policy_id, self.timing)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/sampling/non_batched_sampling.py\", line 634, in advance_rollouts\n",
+      "    new_obs, rewards, terminated, truncated, infos = e.step(actions)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 408, in step\n",
+      "    return self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/utils/make_env.py\", line 129, in step\n",
+      "    obs, rew, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/utils/make_env.py\", line 115, in step\n",
+      "    obs, rew, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/wrappers/scenario_wrappers/gathering_reward_shaping.py\", line 33, in step\n",
+      "    observation, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 469, in step\n",
+      "    observation, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/envs/env_wrappers.py\", line 86, in step\n",
+      "    obs, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 408, in step\n",
+      "    return self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/wrappers/multiplayer_stats.py\", line 54, in step\n",
+      "    obs, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/doom_gym.py\", line 452, in step\n",
+      "    reward = self.game.make_action(actions_flattened, self.skip_frames)\n",
+      "vizdoom.vizdoom.SignalException: Signal SIGINT received. ViZDoom instance has been closed.\n",
+      "\u001b[33m[2023-08-17 11:35:15,048][121226] Unhandled exception Signal SIGINT received. ViZDoom instance has been closed. in evt loop rollout_proc0_evt_loop\u001b[0m\n",
+      "Process rollout_proc0:\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/nix/store/1r6n7v2wam7gkr18gxccpg7p5ywgw551-python3-3.10.12/lib/python3.10/multiprocessing/process.py\", line 314, in _bootstrap\n",
+      "    self.run()\n",
+      "  File \"/nix/store/1r6n7v2wam7gkr18gxccpg7p5ywgw551-python3-3.10.12/lib/python3.10/multiprocessing/process.py\", line 108, in run\n",
+      "    self._target(*self._args, **self._kwargs)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 511, in _target\n",
+      "    self.event_loop.exec()\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 403, in exec\n",
+      "    raise exc\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 399, in exec\n",
+      "    while self._loop_iteration():\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 383, in _loop_iteration\n",
+      "    self._process_signal(s)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 358, in _process_signal\n",
+      "    raise exc\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/signal_slot/signal_slot.py\", line 355, in _process_signal\n",
+      "    slot_callable(*args)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/sampling/rollout_worker.py\", line 241, in advance_rollouts\n",
+      "    complete_rollouts, episodic_stats = runner.advance_rollouts(policy_id, self.timing)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/sampling/non_batched_sampling.py\", line 634, in advance_rollouts\n",
+      "    new_obs, rewards, terminated, truncated, infos = e.step(actions)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 408, in step\n",
+      "    return self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/utils/make_env.py\", line 129, in step\n",
+      "    obs, rew, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/algo/utils/make_env.py\", line 115, in step\n",
+      "    obs, rew, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/wrappers/scenario_wrappers/gathering_reward_shaping.py\", line 33, in step\n",
+      "    observation, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 469, in step\n",
+      "    observation, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sample_factory/envs/env_wrappers.py\", line 86, in step\n",
+      "    obs, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/nix/store/b84h28azn9cg3h9940zb3b3x2569sykl-python3-3.10.12-env/lib/python3.10/site-packages/gymnasium/core.py\", line 408, in step\n",
+      "    return self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/wrappers/multiplayer_stats.py\", line 54, in step\n",
+      "    obs, reward, terminated, truncated, info = self.env.step(action)\n",
+      "  File \"/home/patonw/code/learn/deep-rl-class/.mypy/lib/python3.10/site-packages/sf_examples/vizdoom/doom/doom_gym.py\", line 452, in step\n",
+      "    reward = self.game.make_action(actions_flattened, self.skip_frames)\n",
+      "vizdoom.vizdoom.SignalException: Signal SIGINT received. ViZDoom instance has been closed.\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:35:15,058][121125] Keyboard interrupt detected in the event loop EvtLoop [Runner_EvtLoop, process=main process 121125], exiting...\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:35:15,059][121125] Runner profile tree view:\n",
+      "main_loop: 24.6015\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:35:15,060][121125] Collected {0: 696320}, FPS: 28303.9\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:35:15,060][121211] Stopping Batcher_0...\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:35:15,060][121211] Loop batcher_evt_loop terminating...\u001b[0m\n",
+      "\u001b[37m\u001b[1m[2023-08-17 11:35:15,061][121211] Saving /home/patonw/code/learn/deep-rl-class/notebooks/unit8/train_dir/default_experiment/checkpoint_p0/checkpoint_000000170_696320.pth...\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:35:15,107][121211] Stopping LearnerWorker_p0...\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:35:15,108][121211] Loop learner_proc0_evt_loop terminating...\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:35:15,121][121224] Weights refcount: 2 0\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:35:15,123][121224] Stopping InferenceWorker_p0-w0...\u001b[0m\n",
+      "\u001b[36m[2023-08-17 11:35:15,123][121224] Loop inference_proc0-0_evt_loop terminating...\u001b[0m\n",
+      "[W CudaIPCTypes.cpp:15] Producer process has been terminated before all shared CUDA tensors released. See Note [Sharing CUDA tensors]\n"
+     ]
     }
+   ],
+   "source": [
+    "## Start the training, this should take around 15 minutes\n",
+    "register_vizdoom_components()\n",
+    "\n",
+    "# The scenario we train on today is health gathering\n",
+    "# other scenarios include \"doom_basic\", \"doom_two_colors_easy\", \"doom_dm\", \"doom_dwango5\", \"doom_my_way_home\", \"doom_deadly_corridor\", \"doom_defend_the_center\", \"doom_defend_the_line\"\n",
+    "env = \"doom_health_gathering_supreme\"\n",
+    "cfg = parse_vizdoom_cfg(argv=[f\"--env={env}\", \"--num_workers=8\", \"--num_envs_per_worker=4\", \"--train_for_env_steps=4000000\"])\n",
+    "\n",
+    "status = run_rl(cfg)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "5L0nBS9e_jqC"
+   },
+   "source": [
+    "## Let's take a look at the performance of the trained policy and output a video of the agent."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "MGSA4Kg5_i0j"
+   },
+   "outputs": [],
+   "source": [
+    "from sample_factory.enjoy import enjoy\n",
+    "cfg = parse_vizdoom_cfg(argv=[f\"--env={env}\", \"--num_workers=1\", \"--save_video\", \"--no_render\", \"--max_num_episodes=10\"], evaluation=True)\n",
+    "status = enjoy(cfg)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "Lj5L1x0WLxwB"
+   },
+   "source": [
+    "## Now lets visualize the performance of the agent"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "WsXhBY7JNOdJ"
+   },
+   "outputs": [],
+   "source": [
+    "from base64 import b64encode\n",
+    "from IPython.display import HTML\n",
+    "\n",
+    "mp4 = open('/content/train_dir/default_experiment/replay.mp4','rb').read()\n",
+    "data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n",
+    "HTML(\"\"\"\n",
+    "<video width=640 controls>\n",
+    "      <source src=\"%s\" type=\"video/mp4\">\n",
+    "</video>\n",
+    "\"\"\" % data_url)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "2A4pf_1VwPqR"
+   },
+   "source": [
+    "The agent has learned something, but its performance could be better. We would clearly need to train for longer. But let's upload this model to the Hub."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "CSQVWF0kNuy9"
+   },
+   "source": [
+    "## Now lets upload your checkpoint and video to the Hugging Face Hub\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "JquRrWytA6eo"
+   },
+   "source": [
+    "To be able to share your model with the community there are three more steps to follow:\n",
+    "\n",
+    "1️⃣ (If it's not already done) create an account to HF ➡ https://huggingface.co/join\n",
+    "\n",
+    "2️⃣ Sign in and then, you need to store your authentication token from the Hugging Face website.\n",
+    "- Create a new token (https://huggingface.co/settings/tokens) **with write role**\n",
+    "\n",
+    "<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/notebooks/create-token.jpg\" alt=\"Create HF Token\">\n",
+    "\n",
+    "- Copy the token\n",
+    "- Run the cell below and paste the token"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "_tsf2uv0g_4p"
+   },
+   "source": [
+    "If you don't want to use a Google Colab or a Jupyter Notebook, you need to use this command instead: `huggingface-cli login`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "GoQm_jYSOts0"
+   },
+   "outputs": [],
+   "source": [
+    "from huggingface_hub import notebook_login\n",
+    "notebook_login()\n",
+    "!git config --global credential.helper store"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "sEawW_i0OvJV"
+   },
+   "outputs": [],
+   "source": [
+    "from sample_factory.enjoy import enjoy\n",
+    "\n",
+    "hf_username = \"ThomasSimonini\" # insert your HuggingFace username here\n",
+    "\n",
+    "cfg = parse_vizdoom_cfg(argv=[f\"--env={env}\", \"--num_workers=1\", \"--save_video\", \"--no_render\", \"--max_num_episodes=10\", \"--max_num_frames=100000\", \"--push_to_hub\", f\"--hf_repository={hf_username}/rl_course_vizdoom_health_gathering_supreme\"], evaluation=True)\n",
+    "status = enjoy(cfg)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "9PzeXx-qxVvw"
+   },
+   "source": [
+    "## Let's load another model\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "mHZAWSgL5F7P"
+   },
+   "source": [
+    "This agent's performance was good, but can do better! Let's download and visualize an agent trained for 10B timesteps from the hub."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "Ud6DwAUl5S-l",
+    "lines_to_next_cell": 2
+   },
+   "outputs": [],
+   "source": [
+    "#download the agent from the hub\n",
+    "!python -m sample_factory.huggingface.load_from_hub -r edbeeching/doom_health_gathering_supreme_2222 -d ./train_dir"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "qoUJhL6x6sY5"
+   },
+   "outputs": [],
+   "source": [
+    "!ls train_dir/doom_health_gathering_supreme_2222"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "lZskc8LG8qr8"
+   },
+   "outputs": [],
+   "source": [
+    "env = \"doom_health_gathering_supreme\"\n",
+    "cfg = parse_vizdoom_cfg(argv=[f\"--env={env}\", \"--num_workers=1\", \"--save_video\", \"--no_render\", \"--max_num_episodes=10\", \"--experiment=doom_health_gathering_supreme_2222\", \"--train_dir=train_dir\"], evaluation=True)\n",
+    "status = enjoy(cfg)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "BtzXBoj65Wmq"
+   },
+   "outputs": [],
+   "source": [
+    "mp4 = open('/content/train_dir/doom_health_gathering_supreme_2222/replay.mp4','rb').read()\n",
+    "data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n",
+    "HTML(\"\"\"\n",
+    "<video width=640 controls>\n",
+    "      <source src=\"%s\" type=\"video/mp4\">\n",
+    "</video>\n",
+    "\"\"\" % data_url)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "ie5YWC3NyKO8"
+   },
+   "source": [
+    "## Some additional challenges 🏆: Doom Deathmatch\n",
+    "\n",
+    "Training an agent to play a Doom deathmatch **takes many hours on a more beefy machine than is available in Colab**.\n",
+    "\n",
+    "Fortunately, we have have **already trained an agent in this scenario and it is available in the 🤗 Hub!** Let’s download the model and visualize the agent’s performance."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "fq3WFeus81iI"
+   },
+   "outputs": [],
+   "source": [
+    "# Download the agent from the hub\n",
+    "!python -m sample_factory.huggingface.load_from_hub -r edbeeching/doom_deathmatch_bots_2222 -d ./train_dir"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "7AX_LwxR2FQ0"
+   },
+   "source": [
+    "Given the agent plays for a long time the video generation can take **10 minutes**."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "0hq6XL__85Bv"
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "from sample_factory.enjoy import enjoy\n",
+    "register_vizdoom_components()\n",
+    "env = \"doom_deathmatch_bots\"\n",
+    "cfg = parse_vizdoom_cfg(argv=[f\"--env={env}\", \"--num_workers=1\", \"--save_video\", \"--no_render\", \"--max_num_episodes=1\", \"--experiment=doom_deathmatch_bots_2222\", \"--train_dir=train_dir\"], evaluation=True)\n",
+    "status = enjoy(cfg)\n",
+    "mp4 = open('/content/train_dir/doom_deathmatch_bots_2222/replay.mp4','rb').read()\n",
+    "data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n",
+    "HTML(\"\"\"\n",
+    "<video width=640 controls>\n",
+    "      <source src=\"%s\" type=\"video/mp4\">\n",
+    "</video>\n",
+    "\"\"\" % data_url)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "N6mEC-4zyihx"
+   },
+   "source": [
+    "\n",
+    "You **can try to train your agent in this environment** using the code above, but not on colab.\n",
+    "**Good luck 🤞**"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "YnDAngN6zeeI"
+   },
+   "source": [
+    "If you prefer an easier scenario, **why not try training in another ViZDoom scenario such as `doom_deadly_corridor` or `doom_defend_the_center`.**\n",
+    "\n",
+    "\n",
+    "\n",
+    "---\n",
+    "\n",
+    "\n",
+    "This concludes the last unit. But we are not finished yet! 🤗 The following **bonus section include some of the most interesting, advanced and cutting edge work in Deep Reinforcement Learning**.\n",
+    "\n",
+    "## Keep learning, stay awesome 🤗"
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "collapsed_sections": [
+    "PU4FVzaoM6fC",
+    "nB68Eb9UgC94",
+    "ez5UhUtYcWXF",
+    "sgRy6wnrgnij"
+   ],
+   "include_colab_link": true,
+   "private_outputs": true,
+   "provenance": []
+  },
+  "gpuClass": "standard",
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
   },
-  "nbformat": 4,
-  "nbformat_minor": 0
-}
\ No newline at end of file
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}