RajkNakka commited on
Commit
b4a9e85
1 Parent(s): 92c3dd3

Upload folder using huggingface_hub

Browse files
.summary/0/events.out.tfevents.1688877237.snowflake ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a33a52b7a843217653442a9f39ec2104dcefdac01a85f99eb14a4459093972b
3
+ size 15236
README.md CHANGED
@@ -15,7 +15,7 @@ model-index:
15
  type: doom_health_gathering_supreme
16
  metrics:
17
  - type: mean_reward
18
- value: 3.86 +/- 1.25
19
  name: mean_reward
20
  verified: false
21
  ---
 
15
  type: doom_health_gathering_supreme
16
  metrics:
17
  - type: mean_reward
18
+ value: 3.97 +/- 0.26
19
  name: mean_reward
20
  verified: false
21
  ---
checkpoint_p0/best_000466273_3819708416_reward_63.056.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f546634a38180cb4a57aec199213e3b845b90a5ebb8206ddaa081c2c9d4ba854
3
+ size 41231900
config.json CHANGED
@@ -63,7 +63,7 @@
63
  "summaries_use_frameskip": true,
64
  "heartbeat_interval": 10,
65
  "heartbeat_reporting_interval": 300,
66
- "train_for_env_steps": 2000000,
67
  "train_for_seconds": 1000000,
68
  "save_every_sec": 120,
69
  "keep_checkpoints": 2,
@@ -131,7 +131,7 @@
131
  "git_hash": "0401714b01ee832562a0930e3744117f1ba51e10",
132
  "git_repo_name": "https://github.com/tenkara/HF-DeepRL.git",
133
  "command_line": "--env=doom_health_gathering_supreme --train_for_env_steps=2000000",
 
134
  "lr_adaptive_min": 1e-06,
135
- "lr_adaptive_max": 0.01,
136
- "env_gpu_observations": true
137
  }
 
63
  "summaries_use_frameskip": true,
64
  "heartbeat_interval": 10,
65
  "heartbeat_reporting_interval": 300,
66
+ "train_for_env_steps": 20000,
67
  "train_for_seconds": 1000000,
68
  "save_every_sec": 120,
69
  "keep_checkpoints": 2,
 
131
  "git_hash": "0401714b01ee832562a0930e3744117f1ba51e10",
132
  "git_repo_name": "https://github.com/tenkara/HF-DeepRL.git",
133
  "command_line": "--env=doom_health_gathering_supreme --train_for_env_steps=2000000",
134
+ "env_gpu_observations": true,
135
  "lr_adaptive_min": 1e-06,
136
+ "lr_adaptive_max": 0.01
 
137
  }
git.diff CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:671503816d17efebd2950b3a8da881615b5bda60f712ce94d1cc40365a4b13fa
3
- size 211382168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72fd3c87dce5ac3afe8766d7caea74393f7369ee1e0f7a4adff360d24911b999
3
+ size 211383812
sf_log.txt CHANGED
@@ -561,3 +561,143 @@ Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
561
  [2023-07-08 22:25:33,428][18235] Saving /home/raj/repos/HF-DeepRL/8-Proximal-Policy-Optimization/train_dir/default_experiment/checkpoint_p0/checkpoint_000539850_4422451200.pth...
562
  [2023-07-08 22:25:33,539][18235] Stopping LearnerWorker_p0...
563
  [2023-07-08 22:25:33,539][18235] Loop learner_proc0_evt_loop terminating...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
561
  [2023-07-08 22:25:33,428][18235] Saving /home/raj/repos/HF-DeepRL/8-Proximal-Policy-Optimization/train_dir/default_experiment/checkpoint_p0/checkpoint_000539850_4422451200.pth...
562
  [2023-07-08 22:25:33,539][18235] Stopping LearnerWorker_p0...
563
  [2023-07-08 22:25:33,539][18235] Loop learner_proc0_evt_loop terminating...
564
+ [2023-07-08 22:34:04,498][18621] Using GPUs [0] for process 0 (actually maps to GPUs [0])
565
+ [2023-07-08 22:34:04,498][18621] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0
566
+ [2023-07-08 22:34:04,582][18621] Num visible devices: 1
567
+ [2023-07-08 22:34:04,692][18621] Setting fixed seed 42
568
+ [2023-07-08 22:34:04,692][18641] Using GPUs [0] for process 0 (actually maps to GPUs [0])
569
+ [2023-07-08 22:34:04,692][18641] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0
570
+ [2023-07-08 22:34:04,692][18621] Using GPUs [0] for process 0 (actually maps to GPUs [0])
571
+ [2023-07-08 22:34:04,693][18621] Initializing actor-critic model on device cuda:0
572
+ [2023-07-08 22:34:04,693][18621] RunningMeanStd input shape: (3, 72, 128)
573
+ [2023-07-08 22:34:04,694][18621] RunningMeanStd input shape: (1,)
574
+ [2023-07-08 22:34:04,700][18621] ConvEncoder: input_channels=3
575
+ [2023-07-08 22:34:04,744][18641] Num visible devices: 1
576
+ [2023-07-08 22:34:04,771][18645] Worker 4 uses CPU cores [0]
577
+ [2023-07-08 22:34:04,781][18646] Worker 5 uses CPU cores [1]
578
+ [2023-07-08 22:34:04,822][18643] Worker 0 uses CPU cores [0]
579
+ [2023-07-08 22:34:04,872][18642] Worker 1 uses CPU cores [1]
580
+ [2023-07-08 22:34:05,084][18621] Conv encoder output size: 512
581
+ [2023-07-08 22:34:05,102][18621] Policy head output size: 512
582
+ [2023-07-08 22:34:05,142][18648] Worker 6 uses CPU cores [2]
583
+ [2023-07-08 22:34:05,152][18621] Created Actor Critic model with architecture:
584
+ [2023-07-08 22:34:05,152][18621] ActorCriticSharedWeights(
585
+ (obs_normalizer): ObservationNormalizer(
586
+ (running_mean_std): RunningMeanStdDictInPlace(
587
+ (running_mean_std): ModuleDict(
588
+ (obs): RunningMeanStdInPlace()
589
+ )
590
+ )
591
+ )
592
+ (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
593
+ (encoder): VizdoomEncoder(
594
+ (basic_encoder): ConvEncoder(
595
+ (enc): RecursiveScriptModule(
596
+ original_name=ConvEncoderImpl
597
+ (conv_head): RecursiveScriptModule(
598
+ original_name=Sequential
599
+ (0): RecursiveScriptModule(original_name=Conv2d)
600
+ (1): RecursiveScriptModule(original_name=ReLU)
601
+ (2): RecursiveScriptModule(original_name=Conv2d)
602
+ (3): RecursiveScriptModule(original_name=ReLU)
603
+ (4): RecursiveScriptModule(original_name=Conv2d)
604
+ (5): RecursiveScriptModule(original_name=ReLU)
605
+ )
606
+ (mlp_layers): RecursiveScriptModule(
607
+ original_name=Sequential
608
+ (0): RecursiveScriptModule(original_name=Linear)
609
+ (1): RecursiveScriptModule(original_name=ReLU)
610
+ )
611
+ )
612
+ )
613
+ )
614
+ (core): ModelCoreRNN(
615
+ (core): LSTM(512, 512)
616
+ )
617
+ (decoder): MlpDecoder(
618
+ (mlp): Identity()
619
+ )
620
+ (critic_linear): Linear(in_features=512, out_features=1, bias=True)
621
+ (action_parameterization): ActionParameterizationDefault(
622
+ (distribution_linear): Linear(in_features=512, out_features=5, bias=True)
623
+ )
624
+ )
625
+ [2023-07-08 22:34:05,191][18658] Worker 16 uses CPU cores [0]
626
+ [2023-07-08 22:34:05,221][18644] Worker 2 uses CPU cores [2]
627
+ [2023-07-08 22:34:05,271][18651] Worker 9 uses CPU cores [1]
628
+ [2023-07-08 22:34:05,275][18647] Worker 3 uses CPU cores [3]
629
+ [2023-07-08 22:34:05,321][18652] Worker 10 uses CPU cores [2]
630
+ [2023-07-08 22:34:05,371][18649] Worker 7 uses CPU cores [3]
631
+ [2023-07-08 22:34:05,401][18650] Worker 8 uses CPU cores [0]
632
+ [2023-07-08 22:34:05,406][18657] Worker 15 uses CPU cores [3]
633
+ [2023-07-08 22:34:05,408][18654] Worker 12 uses CPU cores [0]
634
+ [2023-07-08 22:34:05,411][18655] Worker 13 uses CPU cores [1]
635
+ [2023-07-08 22:34:05,421][18659] Worker 17 uses CPU cores [1]
636
+ [2023-07-08 22:34:05,431][18656] Worker 14 uses CPU cores [2]
637
+ [2023-07-08 22:34:05,431][18653] Worker 11 uses CPU cores [3]
638
+ [2023-07-08 22:34:05,531][18661] Worker 19 uses CPU cores [3]
639
+ [2023-07-08 22:34:05,555][18660] Worker 18 uses CPU cores [2]
640
+ [2023-07-08 22:34:05,639][18621] Using optimizer <class 'torch.optim.adam.Adam'>
641
+ [2023-07-08 22:34:05,639][18621] No checkpoints found
642
+ [2023-07-08 22:34:05,640][18621] Did not load from checkpoint, starting from scratch!
643
+ [2023-07-08 22:34:05,640][18621] Initialized policy 0 weights for model version 0
644
+ [2023-07-08 22:34:05,642][18621] LearnerWorker_p0 finished initialization!
645
+ [2023-07-08 22:34:05,642][18621] Using GPUs [0] for process 0 (actually maps to GPUs [0])
646
+ [2023-07-08 22:34:05,765][18641] Unhandled exception CUDA error: OS call failed or operation not supported on this OS
647
+ CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
648
+ For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
649
+ Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
650
+ in evt loop inference_proc0-0_evt_loop
651
+ [2023-07-08 22:35:57,062][18621] Saving /home/raj/repos/HF-DeepRL/8-Proximal-Policy-Optimization/train_dir/default_experiment/checkpoint_p0/checkpoint_000000000_0.pth...
652
+ [2023-07-08 22:37:57,063][18621] Saving /home/raj/repos/HF-DeepRL/8-Proximal-Policy-Optimization/train_dir/default_experiment/checkpoint_p0/checkpoint_000000000_0.pth...
653
+ [2023-07-08 22:39:57,063][18621] Saving /home/raj/repos/HF-DeepRL/8-Proximal-Policy-Optimization/train_dir/default_experiment/checkpoint_p0/checkpoint_000000000_0.pth...
654
+ [2023-07-08 22:41:57,063][18621] Saving /home/raj/repos/HF-DeepRL/8-Proximal-Policy-Optimization/train_dir/default_experiment/checkpoint_p0/checkpoint_000000000_0.pth...
655
+ [2023-07-08 22:43:57,063][18621] Saving /home/raj/repos/HF-DeepRL/8-Proximal-Policy-Optimization/train_dir/default_experiment/checkpoint_p0/checkpoint_000000000_0.pth...
656
+ [2023-07-08 22:45:57,063][18621] Saving /home/raj/repos/HF-DeepRL/8-Proximal-Policy-Optimization/train_dir/default_experiment/checkpoint_p0/checkpoint_000000000_0.pth...
657
+ [2023-07-08 22:47:57,063][18621] Saving /home/raj/repos/HF-DeepRL/8-Proximal-Policy-Optimization/train_dir/default_experiment/checkpoint_p0/checkpoint_000000000_0.pth...
658
+ [2023-07-08 22:48:57,065][18652] Stopping RolloutWorker_w10...
659
+ [2023-07-08 22:48:57,065][18621] Stopping Batcher_0...
660
+ [2023-07-08 22:48:57,065][18621] Loop batcher_evt_loop terminating...
661
+ [2023-07-08 22:48:57,065][18652] Loop rollout_proc10_evt_loop terminating...
662
+ [2023-07-08 22:48:57,065][18661] Stopping RolloutWorker_w19...
663
+ [2023-07-08 22:48:57,065][18661] Loop rollout_proc19_evt_loop terminating...
664
+ [2023-07-08 22:48:57,066][18621] Saving /home/raj/repos/HF-DeepRL/8-Proximal-Policy-Optimization/train_dir/default_experiment/checkpoint_p0/checkpoint_000000000_0.pth...
665
+ [2023-07-08 22:48:57,067][18654] Stopping RolloutWorker_w12...
666
+ [2023-07-08 22:48:57,067][18654] Loop rollout_proc12_evt_loop terminating...
667
+ [2023-07-08 22:48:57,071][18646] Stopping RolloutWorker_w5...
668
+ [2023-07-08 22:48:57,071][18648] Stopping RolloutWorker_w6...
669
+ [2023-07-08 22:48:57,072][18648] Loop rollout_proc6_evt_loop terminating...
670
+ [2023-07-08 22:48:57,072][18646] Loop rollout_proc5_evt_loop terminating...
671
+ [2023-07-08 22:48:57,072][18643] Stopping RolloutWorker_w0...
672
+ [2023-07-08 22:48:57,072][18650] Stopping RolloutWorker_w8...
673
+ [2023-07-08 22:48:57,065][18647] Stopping RolloutWorker_w3...
674
+ [2023-07-08 22:48:57,073][18650] Loop rollout_proc8_evt_loop terminating...
675
+ [2023-07-08 22:48:57,072][18649] Stopping RolloutWorker_w7...
676
+ [2023-07-08 22:48:57,072][18653] Stopping RolloutWorker_w11...
677
+ [2023-07-08 22:48:57,072][18657] Stopping RolloutWorker_w15...
678
+ [2023-07-08 22:48:57,073][18647] Loop rollout_proc3_evt_loop terminating...
679
+ [2023-07-08 22:48:57,073][18649] Loop rollout_proc7_evt_loop terminating...
680
+ [2023-07-08 22:48:57,074][18653] Loop rollout_proc11_evt_loop terminating...
681
+ [2023-07-08 22:48:57,074][18657] Loop rollout_proc15_evt_loop terminating...
682
+ [2023-07-08 22:48:57,082][18660] Stopping RolloutWorker_w18...
683
+ [2023-07-08 22:48:57,082][18642] Stopping RolloutWorker_w1...
684
+ [2023-07-08 22:48:57,082][18660] Loop rollout_proc18_evt_loop terminating...
685
+ [2023-07-08 22:48:57,082][18642] Loop rollout_proc1_evt_loop terminating...
686
+ [2023-07-08 22:48:57,083][18645] Stopping RolloutWorker_w4...
687
+ [2023-07-08 22:48:57,083][18645] Loop rollout_proc4_evt_loop terminating...
688
+ [2023-07-08 22:48:57,092][18656] Stopping RolloutWorker_w14...
689
+ [2023-07-08 22:48:57,092][18651] Stopping RolloutWorker_w9...
690
+ [2023-07-08 22:48:57,092][18651] Loop rollout_proc9_evt_loop terminating...
691
+ [2023-07-08 22:48:57,092][18656] Loop rollout_proc14_evt_loop terminating...
692
+ [2023-07-08 22:48:57,082][18658] Stopping RolloutWorker_w16...
693
+ [2023-07-08 22:48:57,095][18658] Loop rollout_proc16_evt_loop terminating...
694
+ [2023-07-08 22:48:57,098][18643] Loop rollout_proc0_evt_loop terminating...
695
+ [2023-07-08 22:48:57,102][18644] Stopping RolloutWorker_w2...
696
+ [2023-07-08 22:48:57,102][18655] Stopping RolloutWorker_w13...
697
+ [2023-07-08 22:48:57,102][18655] Loop rollout_proc13_evt_loop terminating...
698
+ [2023-07-08 22:48:57,102][18644] Loop rollout_proc2_evt_loop terminating...
699
+ [2023-07-08 22:48:57,112][18659] Stopping RolloutWorker_w17...
700
+ [2023-07-08 22:48:57,112][18659] Loop rollout_proc17_evt_loop terminating...
701
+ [2023-07-08 22:48:57,115][18621] Saving /home/raj/repos/HF-DeepRL/8-Proximal-Policy-Optimization/train_dir/default_experiment/checkpoint_p0/checkpoint_000000000_0.pth...
702
+ [2023-07-08 22:48:57,152][18621] Stopping LearnerWorker_p0...
703
+ [2023-07-08 22:48:57,152][18621] Loop learner_proc0_evt_loop terminating...