51nd0re1 commited on
Commit
c8a2a0a
·
verified ·
1 Parent(s): 63f2d34

Initial commit

Browse files
Files changed (6) hide show
  1. README.md +12 -12
  2. args.yml +2 -2
  3. dqn-Pong-v4.zip +1 -1
  4. dqn-Pong-v4/data +8 -8
  5. results.json +1 -1
  6. train_eval_metrics.zip +2 -2
README.md CHANGED
@@ -1,7 +1,7 @@
1
  ---
2
  library_name: stable-baselines3
3
  tags:
4
- - SpaceInvadersNoFrameskip-v4
5
  - deep-reinforcement-learning
6
  - reinforcement-learning
7
  - stable-baselines3
@@ -12,17 +12,17 @@ model-index:
12
  type: reinforcement-learning
13
  name: reinforcement-learning
14
  dataset:
15
- name: SpaceInvadersNoFrameskip-v4
16
- type: SpaceInvadersNoFrameskip-v4
17
  metrics:
18
  - type: mean_reward
19
- value: 29.00 +/- 64.30
20
  name: mean_reward
21
  verified: false
22
  ---
23
 
24
- # **DQN** Agent playing **SpaceInvadersNoFrameskip-v4**
25
- This is a trained model of a **DQN** agent playing **SpaceInvadersNoFrameskip-v4**
26
  using the [stable-baselines3 library](https://github.com/DLR-RM/stable-baselines3)
27
  and the [RL Zoo](https://github.com/DLR-RM/rl-baselines3-zoo).
28
 
@@ -44,21 +44,21 @@ pip install rl_zoo3
44
 
45
  ```
46
  # Download model and save it into the logs/ folder
47
- python -m rl_zoo3.load_from_hub --algo dqn --env SpaceInvadersNoFrameskip-v4 -orga 51nd0re1 -f logs/
48
- python -m rl_zoo3.enjoy --algo dqn --env SpaceInvadersNoFrameskip-v4 -f logs/
49
  ```
50
 
51
  If you installed the RL Zoo3 via pip (`pip install rl_zoo3`), from anywhere you can do:
52
  ```
53
- python -m rl_zoo3.load_from_hub --algo dqn --env SpaceInvadersNoFrameskip-v4 -orga 51nd0re1 -f logs/
54
- python -m rl_zoo3.enjoy --algo dqn --env SpaceInvadersNoFrameskip-v4 -f logs/
55
  ```
56
 
57
  ## Training (with the RL Zoo)
58
  ```
59
- python -m rl_zoo3.train --algo dqn --env SpaceInvadersNoFrameskip-v4 -f logs/
60
  # Upload the model and generate video (when possible)
61
- python -m rl_zoo3.push_to_hub --algo dqn --env SpaceInvadersNoFrameskip-v4 -f logs/ -orga 51nd0re1
62
  ```
63
 
64
  ## Hyperparameters
 
1
  ---
2
  library_name: stable-baselines3
3
  tags:
4
+ - Pong-v4
5
  - deep-reinforcement-learning
6
  - reinforcement-learning
7
  - stable-baselines3
 
12
  type: reinforcement-learning
13
  name: reinforcement-learning
14
  dataset:
15
+ name: Pong-v4
16
+ type: Pong-v4
17
  metrics:
18
  - type: mean_reward
19
+ value: -21.00 +/- 0.00
20
  name: mean_reward
21
  verified: false
22
  ---
23
 
24
+ # **DQN** Agent playing **Pong-v4**
25
+ This is a trained model of a **DQN** agent playing **Pong-v4**
26
  using the [stable-baselines3 library](https://github.com/DLR-RM/stable-baselines3)
27
  and the [RL Zoo](https://github.com/DLR-RM/rl-baselines3-zoo).
28
 
 
44
 
45
  ```
46
  # Download model and save it into the logs/ folder
47
+ python -m rl_zoo3.load_from_hub --algo dqn --env Pong-v4 -orga 51nd0re1 -f logs/
48
+ python -m rl_zoo3.enjoy --algo dqn --env Pong-v4 -f logs/
49
  ```
50
 
51
  If you installed the RL Zoo3 via pip (`pip install rl_zoo3`), from anywhere you can do:
52
  ```
53
+ python -m rl_zoo3.load_from_hub --algo dqn --env Pong-v4 -orga 51nd0re1 -f logs/
54
+ python -m rl_zoo3.enjoy --algo dqn --env Pong-v4 -f logs/
55
  ```
56
 
57
  ## Training (with the RL Zoo)
58
  ```
59
+ python -m rl_zoo3.train --algo dqn --env Pong-v4 -f logs/
60
  # Upload the model and generate video (when possible)
61
+ python -m rl_zoo3.push_to_hub --algo dqn --env Pong-v4 -f logs/ -orga 51nd0re1
62
  ```
63
 
64
  ## Hyperparameters
args.yml CHANGED
@@ -6,7 +6,7 @@
6
  - - device
7
  - auto
8
  - - env
9
- - SpaceInvadersNoFrameskip-v4
10
  - - env_kwargs
11
  - null
12
  - - eval_env_kwargs
@@ -56,7 +56,7 @@
56
  - - save_replay_buffer
57
  - false
58
  - - seed
59
- - 1274744298
60
  - - storage
61
  - null
62
  - - study_name
 
6
  - - device
7
  - auto
8
  - - env
9
+ - Pong-v4
10
  - - env_kwargs
11
  - null
12
  - - eval_env_kwargs
 
56
  - - save_replay_buffer
57
  - false
58
  - - seed
59
+ - 2808795654
60
  - - storage
61
  - null
62
  - - study_name
dqn-Pong-v4.zip CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:01b24e5b62beb99b9bd0d78ff3fd0bcb3c32506912497897d51a65fdd955168c
3
  size 13715387
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41d2e31a79a64a0acd24bf78406270f01a4b3c883d0201e49c746fe6b6659334
3
  size 13715387
dqn-Pong-v4/data CHANGED
@@ -4,9 +4,9 @@
4
  ":serialized:": "gAWVMAAAAAAAAACMHnN0YWJsZV9iYXNlbGluZXMzLmRxbi5wb2xpY2llc5SMCUNublBvbGljeZSTlC4=",
5
  "__module__": "stable_baselines3.dqn.policies",
6
  "__doc__": "\n Policy class for DQN when using images as input.\n\n :param observation_space: Observation space\n :param action_space: Action space\n :param lr_schedule: Learning rate schedule (could be constant)\n :param net_arch: The specification of the policy and value networks.\n :param activation_fn: Activation function\n :param features_extractor_class: Features extractor to use.\n :param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n :param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n :param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n ",
7
- "__init__": "<function CnnPolicy.__init__ at 0x7a1acdbb5e10>",
8
  "__abstractmethods__": "frozenset()",
9
- "_abc_impl": "<_abc._abc_data object at 0x7a1acdba3740>"
10
  },
11
  "verbose": 1,
12
  "policy_kwargs": {},
@@ -84,13 +84,13 @@
84
  "__module__": "stable_baselines3.common.buffers",
85
  "__annotations__": "{'observations': <class 'numpy.ndarray'>, 'next_observations': <class 'numpy.ndarray'>, 'actions': <class 'numpy.ndarray'>, 'rewards': <class 'numpy.ndarray'>, 'dones': <class 'numpy.ndarray'>, 'timeouts': <class 'numpy.ndarray'>}",
86
  "__doc__": "\n Replay buffer used in off-policy algorithms like SAC/TD3.\n\n :param buffer_size: Max number of element in the buffer\n :param observation_space: Observation space\n :param action_space: Action space\n :param device: PyTorch device\n :param n_envs: Number of parallel environments\n :param optimize_memory_usage: Enable a memory efficient variant\n of the replay buffer which reduces by almost a factor two the memory used,\n at a cost of more complexity.\n See https://github.com/DLR-RM/stable-baselines3/issues/37#issuecomment-637501195\n and https://github.com/DLR-RM/stable-baselines3/pull/28#issuecomment-637559274\n Cannot be used in combination with handle_timeout_termination.\n :param handle_timeout_termination: Handle timeout termination (due to timelimit)\n separately and treat the task as infinite horizon task.\n https://github.com/DLR-RM/stable-baselines3/issues/284\n ",
87
- "__init__": "<function ReplayBuffer.__init__ at 0x7a1acdceaef0>",
88
- "add": "<function ReplayBuffer.add at 0x7a1acdceaf80>",
89
- "sample": "<function ReplayBuffer.sample at 0x7a1acdceb010>",
90
- "_get_samples": "<function ReplayBuffer._get_samples at 0x7a1acdceb0a0>",
91
- "_maybe_cast_dtype": "<staticmethod(<function ReplayBuffer._maybe_cast_dtype at 0x7a1acdceb130>)>",
92
  "__abstractmethods__": "frozenset()",
93
- "_abc_impl": "<_abc._abc_data object at 0x7a1acdc9b900>"
94
  },
95
  "replay_buffer_kwargs": {},
96
  "train_freq": {
 
4
  ":serialized:": "gAWVMAAAAAAAAACMHnN0YWJsZV9iYXNlbGluZXMzLmRxbi5wb2xpY2llc5SMCUNublBvbGljeZSTlC4=",
5
  "__module__": "stable_baselines3.dqn.policies",
6
  "__doc__": "\n Policy class for DQN when using images as input.\n\n :param observation_space: Observation space\n :param action_space: Action space\n :param lr_schedule: Learning rate schedule (could be constant)\n :param net_arch: The specification of the policy and value networks.\n :param activation_fn: Activation function\n :param features_extractor_class: Features extractor to use.\n :param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n :param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n :param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n ",
7
+ "__init__": "<function CnnPolicy.__init__ at 0x7883f67d5e10>",
8
  "__abstractmethods__": "frozenset()",
9
+ "_abc_impl": "<_abc._abc_data object at 0x7883f67dc940>"
10
  },
11
  "verbose": 1,
12
  "policy_kwargs": {},
 
84
  "__module__": "stable_baselines3.common.buffers",
85
  "__annotations__": "{'observations': <class 'numpy.ndarray'>, 'next_observations': <class 'numpy.ndarray'>, 'actions': <class 'numpy.ndarray'>, 'rewards': <class 'numpy.ndarray'>, 'dones': <class 'numpy.ndarray'>, 'timeouts': <class 'numpy.ndarray'>}",
86
  "__doc__": "\n Replay buffer used in off-policy algorithms like SAC/TD3.\n\n :param buffer_size: Max number of element in the buffer\n :param observation_space: Observation space\n :param action_space: Action space\n :param device: PyTorch device\n :param n_envs: Number of parallel environments\n :param optimize_memory_usage: Enable a memory efficient variant\n of the replay buffer which reduces by almost a factor two the memory used,\n at a cost of more complexity.\n See https://github.com/DLR-RM/stable-baselines3/issues/37#issuecomment-637501195\n and https://github.com/DLR-RM/stable-baselines3/pull/28#issuecomment-637559274\n Cannot be used in combination with handle_timeout_termination.\n :param handle_timeout_termination: Handle timeout termination (due to timelimit)\n separately and treat the task as infinite horizon task.\n https://github.com/DLR-RM/stable-baselines3/issues/284\n ",
87
+ "__init__": "<function ReplayBuffer.__init__ at 0x7883f690aef0>",
88
+ "add": "<function ReplayBuffer.add at 0x7883f690af80>",
89
+ "sample": "<function ReplayBuffer.sample at 0x7883f690b010>",
90
+ "_get_samples": "<function ReplayBuffer._get_samples at 0x7883f690b0a0>",
91
+ "_maybe_cast_dtype": "<staticmethod(<function ReplayBuffer._maybe_cast_dtype at 0x7883f690b130>)>",
92
  "__abstractmethods__": "frozenset()",
93
+ "_abc_impl": "<_abc._abc_data object at 0x7883f68ce700>"
94
  },
95
  "replay_buffer_kwargs": {},
96
  "train_freq": {
results.json CHANGED
@@ -1 +1 @@
1
- {"mean_reward": 29.0, "std_reward": 64.29618962271404, "is_deterministic": false, "n_eval_episodes": 10, "eval_datetime": "2024-12-04T07:07:53.943772"}
 
1
+ {"mean_reward": -21.0, "std_reward": 0.0, "is_deterministic": false, "n_eval_episodes": 10, "eval_datetime": "2024-12-04T07:22:07.869826"}
train_eval_metrics.zip CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96545c3822912d5aea75d624c9352c19bd21c0415613df6fadfc9d61f0df2d6b
3
- size 5919
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71b6041f0f35ab2cc17eb08bcf41cc53e2917c06f8088b8a381f74579b74b020
3
+ size 9125