Initial commit

Browse files

Files changed (6) hide show

README.md +12 -12
args.yml +2 -2
dqn-Pong-v4.zip +1 -1
dqn-Pong-v4/data +8 -8
results.json +1 -1
train_eval_metrics.zip +2 -2

README.md CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 library_name: stable-baselines3
 tags:
-- SpaceInvadersNoFrameskip-v4
 - deep-reinforcement-learning
 - reinforcement-learning
 - stable-baselines3
@@ -12,17 +12,17 @@ model-index:
       type: reinforcement-learning
       name: reinforcement-learning
     dataset:
-      name: SpaceInvadersNoFrameskip-v4
-      type: SpaceInvadersNoFrameskip-v4
     metrics:
     - type: mean_reward
-      value: 29.00 +/- 64.30
       name: mean_reward
       verified: false
 ---
-# **DQN** Agent playing **SpaceInvadersNoFrameskip-v4**
-This is a trained model of a **DQN** agent playing **SpaceInvadersNoFrameskip-v4**
 using the [stable-baselines3 library](https://github.com/DLR-RM/stable-baselines3)
 and the [RL Zoo](https://github.com/DLR-RM/rl-baselines3-zoo).
@@ -44,21 +44,21 @@ pip install rl_zoo3
 ```
 # Download model and save it into the logs/ folder
-python -m rl_zoo3.load_from_hub --algo dqn --env SpaceInvadersNoFrameskip-v4 -orga 51nd0re1 -f logs/
-python -m rl_zoo3.enjoy --algo dqn --env SpaceInvadersNoFrameskip-v4  -f logs/
 ```
 If you installed the RL Zoo3 via pip (`pip install rl_zoo3`), from anywhere you can do:
 ```
-python -m rl_zoo3.load_from_hub --algo dqn --env SpaceInvadersNoFrameskip-v4 -orga 51nd0re1 -f logs/
-python -m rl_zoo3.enjoy --algo dqn --env SpaceInvadersNoFrameskip-v4  -f logs/
 ```
 ## Training (with the RL Zoo)
 ```
-python -m rl_zoo3.train --algo dqn --env SpaceInvadersNoFrameskip-v4 -f logs/
 # Upload the model and generate video (when possible)
-python -m rl_zoo3.push_to_hub --algo dqn --env SpaceInvadersNoFrameskip-v4 -f logs/ -orga 51nd0re1
 ```
 ## Hyperparameters

 ---
 library_name: stable-baselines3
 tags:
+- Pong-v4
 - deep-reinforcement-learning
 - reinforcement-learning
 - stable-baselines3
       type: reinforcement-learning
       name: reinforcement-learning
     dataset:
+      name: Pong-v4
+      type: Pong-v4
     metrics:
     - type: mean_reward
+      value: -21.00 +/- 0.00
       name: mean_reward
       verified: false
 ---
+# **DQN** Agent playing **Pong-v4**
+This is a trained model of a **DQN** agent playing **Pong-v4**
 using the [stable-baselines3 library](https://github.com/DLR-RM/stable-baselines3)
 and the [RL Zoo](https://github.com/DLR-RM/rl-baselines3-zoo).
 ```
 # Download model and save it into the logs/ folder
+python -m rl_zoo3.load_from_hub --algo dqn --env Pong-v4 -orga 51nd0re1 -f logs/
+python -m rl_zoo3.enjoy --algo dqn --env Pong-v4  -f logs/
 ```
 If you installed the RL Zoo3 via pip (`pip install rl_zoo3`), from anywhere you can do:
 ```
+python -m rl_zoo3.load_from_hub --algo dqn --env Pong-v4 -orga 51nd0re1 -f logs/
+python -m rl_zoo3.enjoy --algo dqn --env Pong-v4  -f logs/
 ```
 ## Training (with the RL Zoo)
 ```
+python -m rl_zoo3.train --algo dqn --env Pong-v4 -f logs/
 # Upload the model and generate video (when possible)
+python -m rl_zoo3.push_to_hub --algo dqn --env Pong-v4 -f logs/ -orga 51nd0re1
 ```
 ## Hyperparameters

args.yml CHANGED Viewed

@@ -6,7 +6,7 @@
   - - device
     - auto
   - - env
-    - SpaceInvadersNoFrameskip-v4
   - - env_kwargs
     - null
   - - eval_env_kwargs
@@ -56,7 +56,7 @@
   - - save_replay_buffer
     - false
   - - seed
-    - 1274744298
   - - storage
     - null
   - - study_name

   - - device
     - auto
   - - env
+    - Pong-v4
   - - env_kwargs
     - null
   - - eval_env_kwargs
   - - save_replay_buffer
     - false
   - - seed
+    - 2808795654
   - - storage
     - null
   - - study_name

dqn-Pong-v4.zip CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:01b24e5b62beb99b9bd0d78ff3fd0bcb3c32506912497897d51a65fdd955168c
 size 13715387

 version https://git-lfs.github.com/spec/v1
+oid sha256:41d2e31a79a64a0acd24bf78406270f01a4b3c883d0201e49c746fe6b6659334
 size 13715387

dqn-Pong-v4/data CHANGED Viewed

@@ -4,9 +4,9 @@
         ":serialized:": "gAWVMAAAAAAAAACMHnN0YWJsZV9iYXNlbGluZXMzLmRxbi5wb2xpY2llc5SMCUNublBvbGljeZSTlC4=",
         "__module__": "stable_baselines3.dqn.policies",
         "__doc__": "\n    Policy class for DQN when using images as input.\n\n    :param observation_space: Observation space\n    :param action_space: Action space\n    :param lr_schedule: Learning rate schedule (could be constant)\n    :param net_arch: The specification of the policy and value networks.\n    :param activation_fn: Activation function\n    :param features_extractor_class: Features extractor to use.\n    :param normalize_images: Whether to normalize images or not,\n         dividing by 255.0 (True by default)\n    :param optimizer_class: The optimizer to use,\n        ``th.optim.Adam`` by default\n    :param optimizer_kwargs: Additional keyword arguments,\n        excluding the learning rate, to pass to the optimizer\n    ",
-        "__init__": "<function CnnPolicy.__init__ at 0x7a1acdbb5e10>",
         "__abstractmethods__": "frozenset()",
-        "_abc_impl": "<_abc._abc_data object at 0x7a1acdba3740>"
     },
     "verbose": 1,
     "policy_kwargs": {},
@@ -84,13 +84,13 @@
         "__module__": "stable_baselines3.common.buffers",
         "__annotations__": "{'observations': <class 'numpy.ndarray'>, 'next_observations': <class 'numpy.ndarray'>, 'actions': <class 'numpy.ndarray'>, 'rewards': <class 'numpy.ndarray'>, 'dones': <class 'numpy.ndarray'>, 'timeouts': <class 'numpy.ndarray'>}",
         "__doc__": "\n    Replay buffer used in off-policy algorithms like SAC/TD3.\n\n    :param buffer_size: Max number of element in the buffer\n    :param observation_space: Observation space\n    :param action_space: Action space\n    :param device: PyTorch device\n    :param n_envs: Number of parallel environments\n    :param optimize_memory_usage: Enable a memory efficient variant\n        of the replay buffer which reduces by almost a factor two the memory used,\n        at a cost of more complexity.\n        See https://github.com/DLR-RM/stable-baselines3/issues/37#issuecomment-637501195\n        and https://github.com/DLR-RM/stable-baselines3/pull/28#issuecomment-637559274\n        Cannot be used in combination with handle_timeout_termination.\n    :param handle_timeout_termination: Handle timeout termination (due to timelimit)\n        separately and treat the task as infinite horizon task.\n        https://github.com/DLR-RM/stable-baselines3/issues/284\n    ",
-        "__init__": "<function ReplayBuffer.__init__ at 0x7a1acdceaef0>",
-        "add": "<function ReplayBuffer.add at 0x7a1acdceaf80>",
-        "sample": "<function ReplayBuffer.sample at 0x7a1acdceb010>",
-        "_get_samples": "<function ReplayBuffer._get_samples at 0x7a1acdceb0a0>",
-        "_maybe_cast_dtype": "<staticmethod(<function ReplayBuffer._maybe_cast_dtype at 0x7a1acdceb130>)>",
         "__abstractmethods__": "frozenset()",
-        "_abc_impl": "<_abc._abc_data object at 0x7a1acdc9b900>"
     },
     "replay_buffer_kwargs": {},
     "train_freq": {

         ":serialized:": "gAWVMAAAAAAAAACMHnN0YWJsZV9iYXNlbGluZXMzLmRxbi5wb2xpY2llc5SMCUNublBvbGljeZSTlC4=",
         "__module__": "stable_baselines3.dqn.policies",
         "__doc__": "\n    Policy class for DQN when using images as input.\n\n    :param observation_space: Observation space\n    :param action_space: Action space\n    :param lr_schedule: Learning rate schedule (could be constant)\n    :param net_arch: The specification of the policy and value networks.\n    :param activation_fn: Activation function\n    :param features_extractor_class: Features extractor to use.\n    :param normalize_images: Whether to normalize images or not,\n         dividing by 255.0 (True by default)\n    :param optimizer_class: The optimizer to use,\n        ``th.optim.Adam`` by default\n    :param optimizer_kwargs: Additional keyword arguments,\n        excluding the learning rate, to pass to the optimizer\n    ",
+        "__init__": "<function CnnPolicy.__init__ at 0x7883f67d5e10>",
         "__abstractmethods__": "frozenset()",
+        "_abc_impl": "<_abc._abc_data object at 0x7883f67dc940>"
     },
     "verbose": 1,
     "policy_kwargs": {},
         "__module__": "stable_baselines3.common.buffers",
         "__annotations__": "{'observations': <class 'numpy.ndarray'>, 'next_observations': <class 'numpy.ndarray'>, 'actions': <class 'numpy.ndarray'>, 'rewards': <class 'numpy.ndarray'>, 'dones': <class 'numpy.ndarray'>, 'timeouts': <class 'numpy.ndarray'>}",
         "__doc__": "\n    Replay buffer used in off-policy algorithms like SAC/TD3.\n\n    :param buffer_size: Max number of element in the buffer\n    :param observation_space: Observation space\n    :param action_space: Action space\n    :param device: PyTorch device\n    :param n_envs: Number of parallel environments\n    :param optimize_memory_usage: Enable a memory efficient variant\n        of the replay buffer which reduces by almost a factor two the memory used,\n        at a cost of more complexity.\n        See https://github.com/DLR-RM/stable-baselines3/issues/37#issuecomment-637501195\n        and https://github.com/DLR-RM/stable-baselines3/pull/28#issuecomment-637559274\n        Cannot be used in combination with handle_timeout_termination.\n    :param handle_timeout_termination: Handle timeout termination (due to timelimit)\n        separately and treat the task as infinite horizon task.\n        https://github.com/DLR-RM/stable-baselines3/issues/284\n    ",
+        "__init__": "<function ReplayBuffer.__init__ at 0x7883f690aef0>",
+        "add": "<function ReplayBuffer.add at 0x7883f690af80>",
+        "sample": "<function ReplayBuffer.sample at 0x7883f690b010>",
+        "_get_samples": "<function ReplayBuffer._get_samples at 0x7883f690b0a0>",
+        "_maybe_cast_dtype": "<staticmethod(<function ReplayBuffer._maybe_cast_dtype at 0x7883f690b130>)>",
         "__abstractmethods__": "frozenset()",
+        "_abc_impl": "<_abc._abc_data object at 0x7883f68ce700>"
     },
     "replay_buffer_kwargs": {},
     "train_freq": {

results.json CHANGED Viewed

	@@ -1 +1 @@
1	- {"mean_reward": 29.0, "std_reward": 64.~~29618962271404~~, "is_deterministic": false, "n_eval_episodes": 10, "eval_datetime": "2024-12-04T07:07~~:53~~.~~943772~~"}


1	+ {"mean_reward": -21.0, "std_reward": 0.0, "is_deterministic": false, "n_eval_episodes": 10, "eval_datetime": "2024-12-04T07:22:07.869826"}

train_eval_metrics.zip CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:96545c3822912d5aea75d624c9352c19bd21c0415613df6fadfc9d61f0df2d6b
-size 5919

 version https://git-lfs.github.com/spec/v1
+oid sha256:71b6041f0f35ab2cc17eb08bcf41cc53e2917c06f8088b8a381f74579b74b020
+size 9125