Upload folder using huggingface_hub

Browse files

Files changed (6) hide show

.summary/0/events.out.tfevents.1688877237.snowflake +3 -0
README.md +1 -1
checkpoint_p0/best_000466273_3819708416_reward_63.056.pth +3 -0
config.json +3 -3
git.diff +2 -2
sf_log.txt +140 -0

.summary/0/events.out.tfevents.1688877237.snowflake ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4a33a52b7a843217653442a9f39ec2104dcefdac01a85f99eb14a4459093972b
+size 15236

README.md CHANGED Viewed

@@ -15,7 +15,7 @@ model-index:
       type: doom_health_gathering_supreme
     metrics:
     - type: mean_reward
-      value: 3.86 +/- 1.25
       name: mean_reward
       verified: false
 ---

       type: doom_health_gathering_supreme
     metrics:
     - type: mean_reward
+      value: 3.97 +/- 0.26
       name: mean_reward
       verified: false
 ---

checkpoint_p0/best_000466273_3819708416_reward_63.056.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f546634a38180cb4a57aec199213e3b845b90a5ebb8206ddaa081c2c9d4ba854
+size 41231900

config.json CHANGED Viewed

@@ -63,7 +63,7 @@
   "summaries_use_frameskip": true,
   "heartbeat_interval": 10,
   "heartbeat_reporting_interval": 300,
-  "train_for_env_steps": 2000000,
   "train_for_seconds": 1000000,
   "save_every_sec": 120,
   "keep_checkpoints": 2,
@@ -131,7 +131,7 @@
   "git_hash": "0401714b01ee832562a0930e3744117f1ba51e10",
   "git_repo_name": "https://github.com/tenkara/HF-DeepRL.git",
   "command_line": "--env=doom_health_gathering_supreme --train_for_env_steps=2000000",
   "lr_adaptive_min": 1e-06,
-  "lr_adaptive_max": 0.01,
-  "env_gpu_observations": true
 }

   "summaries_use_frameskip": true,
   "heartbeat_interval": 10,
   "heartbeat_reporting_interval": 300,
+  "train_for_env_steps": 20000,
   "train_for_seconds": 1000000,
   "save_every_sec": 120,
   "keep_checkpoints": 2,
   "git_hash": "0401714b01ee832562a0930e3744117f1ba51e10",
   "git_repo_name": "https://github.com/tenkara/HF-DeepRL.git",
   "command_line": "--env=doom_health_gathering_supreme --train_for_env_steps=2000000",
+  "env_gpu_observations": true,
   "lr_adaptive_min": 1e-06,
+  "lr_adaptive_max": 0.01
 }

git.diff CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:671503816d17efebd2950b3a8da881615b5bda60f712ce94d1cc40365a4b13fa
-size 211382168

 version https://git-lfs.github.com/spec/v1
+oid sha256:72fd3c87dce5ac3afe8766d7caea74393f7369ee1e0f7a4adff360d24911b999
+size 211383812

sf_log.txt CHANGED Viewed

@@ -561,3 +561,143 @@ Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
 [2023-07-08 22:25:33,428][18235] Saving /home/raj/repos/HF-DeepRL/8-Proximal-Policy-Optimization/train_dir/default_experiment/checkpoint_p0/checkpoint_000539850_4422451200.pth...
 [2023-07-08 22:25:33,539][18235] Stopping LearnerWorker_p0...
 [2023-07-08 22:25:33,539][18235] Loop learner_proc0_evt_loop terminating...

 [2023-07-08 22:25:33,428][18235] Saving /home/raj/repos/HF-DeepRL/8-Proximal-Policy-Optimization/train_dir/default_experiment/checkpoint_p0/checkpoint_000539850_4422451200.pth...
 [2023-07-08 22:25:33,539][18235] Stopping LearnerWorker_p0...
 [2023-07-08 22:25:33,539][18235] Loop learner_proc0_evt_loop terminating...
+[2023-07-08 22:34:04,498][18621] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2023-07-08 22:34:04,498][18621] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0
+[2023-07-08 22:34:04,582][18621] Num visible devices: 1
+[2023-07-08 22:34:04,692][18621] Setting fixed seed 42
+[2023-07-08 22:34:04,692][18641] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2023-07-08 22:34:04,692][18641] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0
+[2023-07-08 22:34:04,692][18621] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2023-07-08 22:34:04,693][18621] Initializing actor-critic model on device cuda:0
+[2023-07-08 22:34:04,693][18621] RunningMeanStd input shape: (3, 72, 128)
+[2023-07-08 22:34:04,694][18621] RunningMeanStd input shape: (1,)
+[2023-07-08 22:34:04,700][18621] ConvEncoder: input_channels=3
+[2023-07-08 22:34:04,744][18641] Num visible devices: 1
+[2023-07-08 22:34:04,771][18645] Worker 4 uses CPU cores [0]
+[2023-07-08 22:34:04,781][18646] Worker 5 uses CPU cores [1]
+[2023-07-08 22:34:04,822][18643] Worker 0 uses CPU cores [0]
+[2023-07-08 22:34:04,872][18642] Worker 1 uses CPU cores [1]
+[2023-07-08 22:34:05,084][18621] Conv encoder output size: 512
+[2023-07-08 22:34:05,102][18621] Policy head output size: 512
+[2023-07-08 22:34:05,142][18648] Worker 6 uses CPU cores [2]
+[2023-07-08 22:34:05,152][18621] Created Actor Critic model with architecture:
+[2023-07-08 22:34:05,152][18621] ActorCriticSharedWeights(
+  (obs_normalizer): ObservationNormalizer(
+    (running_mean_std): RunningMeanStdDictInPlace(
+      (running_mean_std): ModuleDict(
+        (obs): RunningMeanStdInPlace()
+      )
+    )
+  )
+  (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
+  (encoder): VizdoomEncoder(
+    (basic_encoder): ConvEncoder(
+      (enc): RecursiveScriptModule(
+        original_name=ConvEncoderImpl
+        (conv_head): RecursiveScriptModule(
+          original_name=Sequential
+          (0): RecursiveScriptModule(original_name=Conv2d)
+          (1): RecursiveScriptModule(original_name=ReLU)
+          (2): RecursiveScriptModule(original_name=Conv2d)
+          (3): RecursiveScriptModule(original_name=ReLU)
+          (4): RecursiveScriptModule(original_name=Conv2d)
+          (5): RecursiveScriptModule(original_name=ReLU)
+        )
+        (mlp_layers): RecursiveScriptModule(
+          original_name=Sequential
+          (0): RecursiveScriptModule(original_name=Linear)
+          (1): RecursiveScriptModule(original_name=ReLU)
+        )
+      )
+    )
+  )
+  (core): ModelCoreRNN(
+    (core): LSTM(512, 512)
+  )
+  (decoder): MlpDecoder(
+    (mlp): Identity()
+  )
+  (critic_linear): Linear(in_features=512, out_features=1, bias=True)
+  (action_parameterization): ActionParameterizationDefault(
+    (distribution_linear): Linear(in_features=512, out_features=5, bias=True)
+  )
+)
+[2023-07-08 22:34:05,191][18658] Worker 16 uses CPU cores [0]
+[2023-07-08 22:34:05,221][18644] Worker 2 uses CPU cores [2]
+[2023-07-08 22:34:05,271][18651] Worker 9 uses CPU cores [1]
+[2023-07-08 22:34:05,275][18647] Worker 3 uses CPU cores [3]
+[2023-07-08 22:34:05,321][18652] Worker 10 uses CPU cores [2]
+[2023-07-08 22:34:05,371][18649] Worker 7 uses CPU cores [3]
+[2023-07-08 22:34:05,401][18650] Worker 8 uses CPU cores [0]
+[2023-07-08 22:34:05,406][18657] Worker 15 uses CPU cores [3]
+[2023-07-08 22:34:05,408][18654] Worker 12 uses CPU cores [0]
+[2023-07-08 22:34:05,411][18655] Worker 13 uses CPU cores [1]
+[2023-07-08 22:34:05,421][18659] Worker 17 uses CPU cores [1]
+[2023-07-08 22:34:05,431][18656] Worker 14 uses CPU cores [2]
+[2023-07-08 22:34:05,431][18653] Worker 11 uses CPU cores [3]
+[2023-07-08 22:34:05,531][18661] Worker 19 uses CPU cores [3]
+[2023-07-08 22:34:05,555][18660] Worker 18 uses CPU cores [2]
+[2023-07-08 22:34:05,639][18621] Using optimizer <class 'torch.optim.adam.Adam'>
+[2023-07-08 22:34:05,639][18621] No checkpoints found
+[2023-07-08 22:34:05,640][18621] Did not load from checkpoint, starting from scratch!
+[2023-07-08 22:34:05,640][18621] Initialized policy 0 weights for model version 0
+[2023-07-08 22:34:05,642][18621] LearnerWorker_p0 finished initialization!
+[2023-07-08 22:34:05,642][18621] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2023-07-08 22:34:05,765][18641] Unhandled exception CUDA error: OS call failed or operation not supported on this OS
+CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
+For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
+Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
+ in evt loop inference_proc0-0_evt_loop
+[2023-07-08 22:35:57,062][18621] Saving /home/raj/repos/HF-DeepRL/8-Proximal-Policy-Optimization/train_dir/default_experiment/checkpoint_p0/checkpoint_000000000_0.pth...
+[2023-07-08 22:37:57,063][18621] Saving /home/raj/repos/HF-DeepRL/8-Proximal-Policy-Optimization/train_dir/default_experiment/checkpoint_p0/checkpoint_000000000_0.pth...
+[2023-07-08 22:39:57,063][18621] Saving /home/raj/repos/HF-DeepRL/8-Proximal-Policy-Optimization/train_dir/default_experiment/checkpoint_p0/checkpoint_000000000_0.pth...
+[2023-07-08 22:41:57,063][18621] Saving /home/raj/repos/HF-DeepRL/8-Proximal-Policy-Optimization/train_dir/default_experiment/checkpoint_p0/checkpoint_000000000_0.pth...
+[2023-07-08 22:43:57,063][18621] Saving /home/raj/repos/HF-DeepRL/8-Proximal-Policy-Optimization/train_dir/default_experiment/checkpoint_p0/checkpoint_000000000_0.pth...
+[2023-07-08 22:45:57,063][18621] Saving /home/raj/repos/HF-DeepRL/8-Proximal-Policy-Optimization/train_dir/default_experiment/checkpoint_p0/checkpoint_000000000_0.pth...
+[2023-07-08 22:47:57,063][18621] Saving /home/raj/repos/HF-DeepRL/8-Proximal-Policy-Optimization/train_dir/default_experiment/checkpoint_p0/checkpoint_000000000_0.pth...
+[2023-07-08 22:48:57,065][18652] Stopping RolloutWorker_w10...
+[2023-07-08 22:48:57,065][18621] Stopping Batcher_0...
+[2023-07-08 22:48:57,065][18621] Loop batcher_evt_loop terminating...
+[2023-07-08 22:48:57,065][18652] Loop rollout_proc10_evt_loop terminating...
+[2023-07-08 22:48:57,065][18661] Stopping RolloutWorker_w19...
+[2023-07-08 22:48:57,065][18661] Loop rollout_proc19_evt_loop terminating...
+[2023-07-08 22:48:57,066][18621] Saving /home/raj/repos/HF-DeepRL/8-Proximal-Policy-Optimization/train_dir/default_experiment/checkpoint_p0/checkpoint_000000000_0.pth...
+[2023-07-08 22:48:57,067][18654] Stopping RolloutWorker_w12...
+[2023-07-08 22:48:57,067][18654] Loop rollout_proc12_evt_loop terminating...
+[2023-07-08 22:48:57,071][18646] Stopping RolloutWorker_w5...
+[2023-07-08 22:48:57,071][18648] Stopping RolloutWorker_w6...
+[2023-07-08 22:48:57,072][18648] Loop rollout_proc6_evt_loop terminating...
+[2023-07-08 22:48:57,072][18646] Loop rollout_proc5_evt_loop terminating...
+[2023-07-08 22:48:57,072][18643] Stopping RolloutWorker_w0...
+[2023-07-08 22:48:57,072][18650] Stopping RolloutWorker_w8...
+[2023-07-08 22:48:57,065][18647] Stopping RolloutWorker_w3...
+[2023-07-08 22:48:57,073][18650] Loop rollout_proc8_evt_loop terminating...
+[2023-07-08 22:48:57,072][18649] Stopping RolloutWorker_w7...
+[2023-07-08 22:48:57,072][18653] Stopping RolloutWorker_w11...
+[2023-07-08 22:48:57,072][18657] Stopping RolloutWorker_w15...
+[2023-07-08 22:48:57,073][18647] Loop rollout_proc3_evt_loop terminating...
+[2023-07-08 22:48:57,073][18649] Loop rollout_proc7_evt_loop terminating...
+[2023-07-08 22:48:57,074][18653] Loop rollout_proc11_evt_loop terminating...
+[2023-07-08 22:48:57,074][18657] Loop rollout_proc15_evt_loop terminating...
+[2023-07-08 22:48:57,082][18660] Stopping RolloutWorker_w18...
+[2023-07-08 22:48:57,082][18642] Stopping RolloutWorker_w1...
+[2023-07-08 22:48:57,082][18660] Loop rollout_proc18_evt_loop terminating...
+[2023-07-08 22:48:57,082][18642] Loop rollout_proc1_evt_loop terminating...
+[2023-07-08 22:48:57,083][18645] Stopping RolloutWorker_w4...
+[2023-07-08 22:48:57,083][18645] Loop rollout_proc4_evt_loop terminating...
+[2023-07-08 22:48:57,092][18656] Stopping RolloutWorker_w14...
+[2023-07-08 22:48:57,092][18651] Stopping RolloutWorker_w9...
+[2023-07-08 22:48:57,092][18651] Loop rollout_proc9_evt_loop terminating...
+[2023-07-08 22:48:57,092][18656] Loop rollout_proc14_evt_loop terminating...
+[2023-07-08 22:48:57,082][18658] Stopping RolloutWorker_w16...
+[2023-07-08 22:48:57,095][18658] Loop rollout_proc16_evt_loop terminating...
+[2023-07-08 22:48:57,098][18643] Loop rollout_proc0_evt_loop terminating...
+[2023-07-08 22:48:57,102][18644] Stopping RolloutWorker_w2...
+[2023-07-08 22:48:57,102][18655] Stopping RolloutWorker_w13...
+[2023-07-08 22:48:57,102][18655] Loop rollout_proc13_evt_loop terminating...
+[2023-07-08 22:48:57,102][18644] Loop rollout_proc2_evt_loop terminating...
+[2023-07-08 22:48:57,112][18659] Stopping RolloutWorker_w17...
+[2023-07-08 22:48:57,112][18659] Loop rollout_proc17_evt_loop terminating...
+[2023-07-08 22:48:57,115][18621] Saving /home/raj/repos/HF-DeepRL/8-Proximal-Policy-Optimization/train_dir/default_experiment/checkpoint_p0/checkpoint_000000000_0.pth...
+[2023-07-08 22:48:57,152][18621] Stopping LearnerWorker_p0...
+[2023-07-08 22:48:57,152][18621] Loop learner_proc0_evt_loop terminating...