sgoodfriend
commited on
Commit
•
ccb2a36
1
Parent(s):
a1952e6
PPO playing HalfCheetahBulletEnv-v0 from https://github.com/sgoodfriend/rl-algo-impls/tree/2067e21d62fff5db60168687e7d9e89019a8bfc0
Browse files- README.md +4 -4
- replay.meta.json +1 -1
- replay.mp4 +2 -2
- rl_algo_impls/benchmark_publish.py +2 -2
README.md
CHANGED
@@ -10,7 +10,7 @@ model-index:
|
|
10 |
results:
|
11 |
- metrics:
|
12 |
- type: mean_reward
|
13 |
-
value:
|
14 |
name: mean_reward
|
15 |
task:
|
16 |
type: reinforcement-learning
|
@@ -31,9 +31,9 @@ This model was trained from 3 trainings of **PPO** agents using different initia
|
|
31 |
|
32 |
| algo | env | seed | reward_mean | reward_std | eval_episodes | best | wandb_url |
|
33 |
|:-------|:------------------------|-------:|--------------:|-------------:|----------------:|:-------|:-----------------------------------------------------------------------------|
|
34 |
-
| ppo | HalfCheetahBulletEnv-v0 | 1 |
|
35 |
-
| ppo | HalfCheetahBulletEnv-v0 | 2 |
|
36 |
-
| ppo | HalfCheetahBulletEnv-v0 | 3 |
|
37 |
|
38 |
|
39 |
### Prerequisites: Weights & Biases (WandB)
|
|
|
10 |
results:
|
11 |
- metrics:
|
12 |
- type: mean_reward
|
13 |
+
value: 3168.2 +/- 27.43
|
14 |
name: mean_reward
|
15 |
task:
|
16 |
type: reinforcement-learning
|
|
|
31 |
|
32 |
| algo | env | seed | reward_mean | reward_std | eval_episodes | best | wandb_url |
|
33 |
|:-------|:------------------------|-------:|--------------:|-------------:|----------------:|:-------|:-----------------------------------------------------------------------------|
|
34 |
+
| ppo | HalfCheetahBulletEnv-v0 | 1 | 2799.86 | 27.2989 | 16 | | [wandb](https://wandb.ai/sgoodfriend/rl-algo-impls-benchmarks/runs/p17bxd09) |
|
35 |
+
| ppo | HalfCheetahBulletEnv-v0 | 2 | 2731.34 | 55.7033 | 16 | | [wandb](https://wandb.ai/sgoodfriend/rl-algo-impls-benchmarks/runs/rnpgo6ad) |
|
36 |
+
| ppo | HalfCheetahBulletEnv-v0 | 3 | 3168.2 | 27.4306 | 16 | * | [wandb](https://wandb.ai/sgoodfriend/rl-algo-impls-benchmarks/runs/kfn11dqj) |
|
37 |
|
38 |
|
39 |
### Prerequisites: Weights & Biases (WandB)
|
replay.meta.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"content_type": "video/mp4", "encoder_version": {"backend": "ffmpeg", "version": "b'ffmpeg version
|
|
|
1 |
+
{"content_type": "video/mp4", "encoder_version": {"backend": "ffmpeg", "version": "b'ffmpeg version 4.2.7-0ubuntu0.1 Copyright (c) 2000-2022 the FFmpeg developers\\nbuilt with gcc 9 (Ubuntu 9.4.0-1ubuntu1~20.04.1)\\nconfiguration: --prefix=/usr --extra-version=0ubuntu0.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-avresample --disable-filter=resample --enable-avisynth --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librsvg --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzmq --enable-libzvbi --enable-lv2 --enable-omx --enable-openal --enable-opencl --enable-opengl --enable-sdl2 --enable-libdc1394 --enable-libdrm --enable-libiec61883 --enable-nvenc --enable-chromaprint --enable-frei0r --enable-libx264 --enable-shared\\nlibavutil 56. 31.100 / 56. 31.100\\nlibavcodec 58. 54.100 / 58. 54.100\\nlibavformat 58. 29.100 / 58. 29.100\\nlibavdevice 58. 8.100 / 58. 8.100\\nlibavfilter 7. 57.100 / 7. 57.100\\nlibavresample 4. 0. 0 / 4. 0. 0\\nlibswscale 5. 5.100 / 5. 5.100\\nlibswresample 3. 5.100 / 3. 5.100\\nlibpostproc 55. 5.100 / 55. 5.100\\n'", "cmdline": ["ffmpeg", "-nostats", "-loglevel", "error", "-y", "-f", "rawvideo", "-s:v", "320x240", "-pix_fmt", "rgb24", "-framerate", "60", "-i", "-", "-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2", "-vcodec", "libx264", "-pix_fmt", "yuv420p", "-r", "60", "/tmp/tmpqhnzjxei/ppo-HalfCheetahBulletEnv-v0/replay.mp4"]}, "episode": {"r": 3175.2041015625, "l": 1000, "t": 28.550228}}
|
replay.mp4
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fa73b9ba5079a0dde4adbf5d526c9e4ac30ee1b985c421565a80f9ca9b5bee77
|
3 |
+
size 1424508
|
rl_algo_impls/benchmark_publish.py
CHANGED
@@ -54,8 +54,8 @@ def benchmark_publish() -> None:
|
|
54 |
"--virtual-display", action="store_true", help="Use headless virtual display"
|
55 |
)
|
56 |
# parser.set_defaults(
|
57 |
-
# wandb_tags=["
|
58 |
-
# wandb_report_url="https://api.wandb.ai/links/sgoodfriend/
|
59 |
# envs=[],
|
60 |
# exclude_envs=[],
|
61 |
# )
|
|
|
54 |
"--virtual-display", action="store_true", help="Use headless virtual display"
|
55 |
)
|
56 |
# parser.set_defaults(
|
57 |
+
# wandb_tags=["benchmark_2067e21", "host_155-248-199-228"],
|
58 |
+
# wandb_report_url="https://api.wandb.ai/links/sgoodfriend/09frjfcs",
|
59 |
# envs=[],
|
60 |
# exclude_envs=[],
|
61 |
# )
|