diff --git "a/training.log" "b/training.log"
--- "a/training.log"
+++ "b/training.log"
@@ -1,14 +1,14 @@
 /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
   warnings.warn(
-[2023-04-14 08:32:38,890] [WARNING] [runner.py:190:fetch_hostfile] Unable to find hostfile, will proceed with training with local resources only.
-[2023-04-14 08:32:39,797] [INFO] [runner.py:540:main] cmd = /home/AdamG012/.conda/envs/py39/bin/python -u -m deepspeed.launcher.launch --world_info=eyJsb2NhbGhvc3QiOiBbMCwgMSwgMiwgMywgNCwgNSwgNiwgN119 --master_addr=127.0.0.1 --master_port=12346 --enable_each_rank_log=None main.py --data_path Dahoas/rm-static Dahoas/full-hh-rlhf Dahoas/synthetic-instruct-gptj-pairwise yitingxie/rlhf-reward-datasets openai/webgpt_comparisons stanfordnlp/SHP --data_split 2,4,4 --actor_model_name_or_path /chatgpt/hf_runs/DeepSpeedExamples/applications/DeepSpeed-Chat/output/actor-models/1.3b --critic_model_name_or_path /chatgpt/hf_runs/DeepSpeedExamples/applications/DeepSpeed-Chat/output/reward-models/350m --num_padding_at_beginning 1 --per_device_train_batch_size 4 --per_device_mini_train_batch_size 4 --generation_batch_numbers 1 --ppo_epochs 1 --max_answer_seq_len 256 --max_prompt_seq_len 256 --actor_learning_rate 9.65e-6 --critic_learning_rate 5e-6 --actor_weight_decay 0.1 --critic_weight_decay 0.1 --num_train_epochs 1 --lr_scheduler_type cosine --gradient_accumulation_steps 1 --num_warmup_steps 100 --deepspeed --seed 1234 --enable_hybrid_engine --actor_zero_stage 2 --critic_zero_stage 2 --output_dir /chatgpt/hf_runs/DeepSpeedExamples/applications/DeepSpeed-Chat/output/step3-models/1.3b
+[2023-04-21 23:35:43,549] [WARNING] [runner.py:190:fetch_hostfile] Unable to find hostfile, will proceed with training with local resources only.
+[2023-04-21 23:35:44,216] [INFO] [runner.py:540:main] cmd = /home/AdamG012/.conda/envs/py39/bin/python -u -m deepspeed.launcher.launch --world_info=eyJsb2NhbGhvc3QiOiBbMCwgMSwgMiwgMywgNCwgNSwgNiwgN119 --master_addr=127.0.0.1 --master_port=12346 --enable_each_rank_log=None main.py --data_path Dahoas/rm-static --data_split 2,4,4 --actor_model_name_or_path ../step1_supervised_finetuning/output/ --critic_model_name_or_path ../step2_reward_model_finetuning/output/ --num_padding_at_beginning 1 --per_device_train_batch_size 4 --per_device_mini_train_batch_size 4 --generation_batch_numbers 1 --ppo_epochs 1 --max_answer_seq_len 256 --max_prompt_seq_len 256 --actor_learning_rate 9.65e-6 --critic_learning_rate 5e-6 --num_train_epochs 1 --lr_scheduler_type cosine --gradient_accumulation_steps 1 --disable_actor_dropout --num_warmup_steps 100 --deepspeed --seed 1234 --enable_hybrid_engine --actor_zero_stage 2 --critic_zero_stage 2 --enable_ema --output_dir ./output
 /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
   warnings.warn(
-[2023-04-14 08:32:50,431] [INFO] [launch.py:229:main] WORLD INFO DICT: {'localhost': [0, 1, 2, 3, 4, 5, 6, 7]}
-[2023-04-14 08:32:50,884] [INFO] [launch.py:235:main] nnodes=1, num_local_procs=8, node_rank=0
-[2023-04-14 08:32:50,884] [INFO] [launch.py:246:main] global_rank_mapping=defaultdict(<class 'list'>, {'localhost': [0, 1, 2, 3, 4, 5, 6, 7]})
-[2023-04-14 08:32:50,884] [INFO] [launch.py:247:main] dist_world_size=8
-[2023-04-14 08:32:50,884] [INFO] [launch.py:249:main] Setting CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+[2023-04-21 23:35:47,889] [INFO] [launch.py:229:main] WORLD INFO DICT: {'localhost': [0, 1, 2, 3, 4, 5, 6, 7]}
+[2023-04-21 23:35:47,889] [INFO] [launch.py:235:main] nnodes=1, num_local_procs=8, node_rank=0
+[2023-04-21 23:35:47,889] [INFO] [launch.py:246:main] global_rank_mapping=defaultdict(<class 'list'>, {'localhost': [0, 1, 2, 3, 4, 5, 6, 7]})
+[2023-04-21 23:35:47,889] [INFO] [launch.py:247:main] dist_world_size=8
+[2023-04-21 23:35:47,889] [INFO] [launch.py:249:main] Setting CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
 /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
   warnings.warn(
 /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
@@ -25,112 +25,34 @@
   warnings.warn(
 /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
   warnings.warn(
-[2023-04-14 08:35:14,863] [INFO] [comm.py:586:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl
+[2023-04-21 23:35:59,562] [INFO] [comm.py:586:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl
 Found cached dataset parquet (/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
-  0%|          | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
-  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 64.08it/s]
-100%|██████████| 2/2 [00:00<00:00, 690.82it/s]
-Found cached dataset parquet (/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
-  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 571.39it/s]
 Found cached dataset parquet (/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
-  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 716.85it/s]
+  0%|          | 0/2 [00:00<?, ?it/s]  0%|          | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+100%|██████████| 2/2 [00:00<00:00, 620.78it/s]
+100%|██████████| 2/2 [00:00<00:00, 648.37it/s]
+  0%|          | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+100%|██████████| 2/2 [00:00<00:00, 656.95it/s]
+  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 663.55it/s]
 Found cached dataset parquet (/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
-  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 662.45it/s]
+  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 647.22it/s]
 Found cached dataset parquet (/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
-  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 746.78it/s]
+  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 682.06it/s]
 Found cached dataset parquet (/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
-  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 679.02it/s]
+  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 571.98it/s]
 Found cached dataset parquet (/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
-  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 639.18it/s]
-Found cached dataset parquet (/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
-  0%|          | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
-  0%|          | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
-100%|██████████| 2/2 [00:00<00:00,  6.89it/s]100%|██████████| 2/2 [00:00<00:00,  6.89it/s]
-100%|██████████| 2/2 [00:00<00:00, 156.58it/s]
-  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 26.93it/s]
-Found cached dataset parquet (/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
-  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 666.77it/s]
-Found cached dataset parquet (/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
-  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 23.94it/s]
-Found cached dataset parquet (/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
-  0%|          | 0/2 [00:00<?, ?it/s] 50%|█████     | 1/2 [00:00<00:00,  1.18it/s]100%|██████████| 2/2 [00:00<00:00,  2.21it/s]
-Found cached dataset parquet (/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
-Found cached dataset parquet (/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
-  0%|          | 0/2 [00:00<?, ?it/s] 50%|█████     | 1/2 [00:00<00:00,  7.78it/s]  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:03<00:00,  2.20s/it]100%|██████████| 2/2 [00:04<00:00,  2.01s/it]
- 50%|█████     | 1/2 [00:02<00:02,  2.33s/it]100%|██████████| 2/2 [00:02<00:00,  1.08s/it]100%|██████████| 2/2 [00:02<00:00,  1.27s/it]
-Found cached dataset parquet (/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
-  0%|          | 0/1 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
-100%|██████████| 1/1 [00:00<00:00, 15.08it/s]
-  0%|          | 0/1 [00:00<?, ?it/s]100%|██████████| 1/1 [00:00<00:00, 681.67it/s]
-Found cached dataset parquet (/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
-  0%|          | 0/1 [00:00<?, ?it/s]100%|██████████| 1/1 [00:00<00:00, 15.83it/s]
-Found cached dataset parquet (/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
-  0%|          | 0/1 [00:00<?, ?it/s]100%|██████████| 1/1 [00:00<00:00, 564.36it/s]
-Found cached dataset parquet (/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
-  0%|          | 0/1 [00:00<?, ?it/s]100%|██████████| 1/1 [00:00<00:00, 683.11it/s]
-Found cached dataset parquet (/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
-  0%|          | 0/1 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
-100%|██████████| 1/1 [00:07<00:00,  7.49s/it]100%|██████████| 1/1 [00:07<00:00,  7.66s/it]
-  0%|          | 0/1 [00:00<?, ?it/s]100%|██████████| 1/1 [00:00<00:00, 475.71it/s]
-Found cached dataset parquet (/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
-  0%|          | 0/1 [00:00<?, ?it/s]100%|██████████| 1/1 [00:00<00:00,  3.12it/s]100%|██████████| 1/1 [00:00<00:00,  3.12it/s]
-Found cached dataset parquet (/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
-  0%|          | 0/2 [00:00<?, ?it/s] 50%|█████     | 1/2 [00:00<00:00,  5.79it/s]100%|██████████| 2/2 [00:00<00:00,  2.84it/s]100%|██████████| 2/2 [00:00<00:00,  3.08it/s]
-Found cached dataset parquet (/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
-Found cached dataset parquet (/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
-  0%|          | 0/2 [00:00<?, ?it/s]  0%|          | 0/2 [00:00<?, ?it/s] 50%|█████     | 1/2 [00:00<00:00,  1.81it/s]100%|██████████| 2/2 [00:00<00:00,  3.11it/s]
-100%|██████████| 2/2 [00:00<00:00, 21.36it/s]
-Found cached dataset parquet (/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
-  0%|          | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
- 50%|█████     | 1/2 [00:05<00:05,  5.17s/it]100%|██████████| 2/2 [00:05<00:00,  2.35s/it]100%|██████████| 2/2 [00:05<00:00,  2.78s/it]
-  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 223.68it/s]
-Found cached dataset parquet (/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
-  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 751.26it/s]
-Found cached dataset parquet (/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
-  0%|          | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
-100%|██████████| 2/2 [00:00<00:00, 22.35it/s]
-  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 756.48it/s]
-Found cached dataset webgpt_comparisons (/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
-  0%|          | 0/1 [00:00<?, ?it/s]Found cached dataset webgpt_comparisons (/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
-  0%|          | 0/1 [00:00<?, ?it/s]100%|██████████| 1/1 [00:00<00:00, 67.81it/s]
-100%|██████████| 1/1 [00:00<00:00, 278.17it/s]
-Found cached dataset webgpt_comparisons (/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
-  0%|          | 0/1 [00:00<?, ?it/s]100%|██████████| 1/1 [00:00<00:00, 293.12it/s]
-Found cached dataset webgpt_comparisons (/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
-  0%|          | 0/1 [00:00<?, ?it/s]100%|██████████| 1/1 [00:00<00:00, 276.56it/s]
-Found cached dataset webgpt_comparisons (/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
-  0%|          | 0/1 [00:00<?, ?it/s]100%|██████████| 1/1 [00:00<00:00, 169.24it/s]
-Found cached dataset webgpt_comparisons (/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
-  0%|          | 0/1 [00:00<?, ?it/s]Found cached dataset webgpt_comparisons (/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
-100%|██████████| 1/1 [00:00<00:00, 289.58it/s]
-  0%|          | 0/1 [00:00<?, ?it/s]100%|██████████| 1/1 [00:00<00:00, 289.08it/s]
-Found cached dataset webgpt_comparisons (/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
-  0%|          | 0/1 [00:00<?, ?it/s]100%|██████████| 1/1 [00:08<00:00,  8.44s/it]100%|██████████| 1/1 [00:09<00:00,  9.61s/it]
-Found cached dataset json (/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
-  0%|          | 0/3 [00:00<?, ?it/s]Found cached dataset json (/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
- 33%|███▎      | 1/3 [00:15<00:30, 15.43s/it]  0%|          | 0/3 [00:00<?, ?it/s] 67%|██████▋   | 2/3 [00:20<00:09,  9.35s/it]100%|██████████| 3/3 [00:23<00:00,  6.47s/it]100%|██████████| 3/3 [00:23<00:00,  7.85s/it]
- 33%|███▎      | 1/3 [00:03<00:06,  3.12s/it] 67%|██████▋   | 2/3 [00:03<00:01,  1.36s/it]100%|██████████| 3/3 [00:03<00:00,  1.08s/it]
-Found cached dataset json (/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
-  0%|          | 0/3 [00:00<?, ?it/s]Found cached dataset json (/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
-  0%|          | 0/3 [00:00<?, ?it/s] 33%|███▎      | 1/3 [00:09<00:19,  9.58s/it]Found cached dataset json (/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
- 33%|███▎      | 1/3 [00:00<00:00,  8.48it/s]100%|██████████| 3/3 [00:00<00:00, 18.45it/s]
-100%|██████████| 3/3 [00:09<00:00,  3.22s/it]
-  0%|          | 0/3 [00:00<?, ?it/s]100%|██████████| 3/3 [00:00<00:00, 241.71it/s]
-Found cached dataset json (/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
-Found cached dataset json (/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
-  0%|          | 0/3 [00:00<?, ?it/s]  0%|          | 0/3 [00:00<?, ?it/s] 33%|███▎      | 1/3 [00:05<00:10,  5.20s/it] 33%|███▎      | 1/3 [00:02<00:05,  2.93s/it] 67%|██████▋   | 2/3 [00:05<00:02,  2.29s/it] 67%|██████▋   | 2/3 [00:03<00:01,  1.35s/it]100%|██████████| 3/3 [00:03<00:00,  1.22it/s]100%|██████████| 3/3 [00:05<00:00,  1.33s/it]100%|██████████| 3/3 [00:03<00:00,  1.12s/it]100%|██████████| 3/3 [00:05<00:00,  1.88s/it]
-
-Found cached dataset json (/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
-  0%|          | 0/3 [00:00<?, ?it/s]100%|██████████| 3/3 [00:00<00:00, 261.87it/s]
+  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 699.63it/s]
 ************************[start] Initializing Actor Model [start] *************************
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
-huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
@@ -143,6 +65,8 @@ huggingface/tokenizers: The current process just got forked, after parallelism h
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
@@ -163,10 +87,16 @@ huggingface/tokenizers: The current process just got forked, after parallelism h
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Detected CUDA files, patching ldflags
+Emitting ninja build file /home/AdamG012/.cache/torch_extensions/py39_cu113/fused_adam/build.ninja...
+Building extension module fused_adam...
+Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
@@ -178,25 +108,11 @@ To disable this warning, you can either:
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
-Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-
-Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
-Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
-Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
-Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combinationInstalled CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
-
-Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
-Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
-Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
-Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
-Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
-Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
-Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
@@ -205,29 +121,19 @@ huggingface/tokenizers: The current process just got forked, after parallelism h
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-Detected CUDA files, patching ldflags
-Emitting ninja build file /home/AdamG012/.cache/torch_extensions/py39_cu113/fused_adam/build.ninja...
-Building extension module fused_adam...
-Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 ninja: no work to do.
 Loading extension module fused_adam...
-Time to load fused_adam op: 4.954506158828735 seconds
-Loading extension module fused_adam...
-Time to load fused_adam op: 4.809650182723999 seconds
-Loading extension module fused_adam...
-Loading extension module fused_adam...
-Loading extension module fused_adam...
-Loading extension module fused_adam...
-Loading extension module fused_adam...
-Time to load fused_adam op: 5.013200044631958 seconds
-Time to load fused_adam op: 5.013401508331299 seconds
-Time to load fused_adam op: 5.013464689254761 seconds
-Time to load fused_adam op: 5.013665199279785 seconds
-Time to load fused_adam op: 5.014423608779907 seconds
+Time to load fused_adam op: 0.7593903541564941 seconds
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
@@ -236,12 +142,14 @@ huggingface/tokenizers: The current process just got forked, after parallelism h
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
-Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
@@ -260,30 +168,42 @@ To disable this warning, you can either:
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
 ninja: no work to do.
 Loading extension module fused_adam...
-Time to load fused_adam op: 7.266183376312256 seconds
-[2023-04-14 08:44:02,458] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed info: version=0.9.0, git-hash=unknown, git-branch=unknown
-[2023-04-14 08:44:08,781] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False
-[2023-04-14 08:44:13,170] [INFO] [logging.py:96:log_dist] [Rank 0] Removing param_group that has no 'params' in the client Optimizer
-[2023-04-14 08:44:13,170] [INFO] [logging.py:96:log_dist] [Rank 0] Using client Optimizer as basic optimizer
+Time to load fused_adam op: 0.9814262390136719 seconds
+Loading extension module fused_adam...
+Time to load fused_adam op: 0.8034741878509521 seconds
+Loading extension module fused_adam...
+Time to load fused_adam op: 1.5046265125274658 seconds
+Loading extension module fused_adam...
+Time to load fused_adam op: 0.7058024406433105 seconds
+Loading extension module fused_adam...
+Time to load fused_adam op: 1.3045263290405273 seconds
+Loading extension module fused_adam...
+Time to load fused_adam op: 1.0043704509735107 seconds
+Loading extension module fused_adam...
+Time to load fused_adam op: 1.113295078277588 seconds
+[2023-04-21 23:36:44,898] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed info: version=0.9.0, git-hash=unknown, git-branch=unknown
+[2023-04-21 23:36:57,077] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False
+[2023-04-21 23:36:57,079] [INFO] [logging.py:96:log_dist] [Rank 0] Removing param_group that has no 'params' in the client Optimizer
+[2023-04-21 23:36:57,079] [INFO] [logging.py:96:log_dist] [Rank 0] Using client Optimizer as basic optimizer
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...[2023-04-21 23:36:57,099] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Basic Optimizer = FusedAdam
+
+[2023-04-21 23:36:57,099] [INFO] [utils.py:51:is_zero_supported_optimizer] Checking ZeRO support for optimizer=FusedAdam type=<class 'deepspeed.ops.adam.fused_adam.FusedAdam'>
+[2023-04-21 23:36:57,099] [INFO] [logging.py:96:log_dist] [Rank 0] Creating torch.float16 ZeRO stage 2 optimizer
+[2023-04-21 23:36:57,099] [INFO] [stage_1_and_2.py:133:__init__] Reduce bucket size 500,000,000
+[2023-04-21 23:36:57,099] [INFO] [stage_1_and_2.py:134:__init__] Allgather bucket size 500,000,000
+[2023-04-21 23:36:57,099] [INFO] [stage_1_and_2.py:135:__init__] CPU Offload: False
+[2023-04-21 23:36:57,099] [INFO] [stage_1_and_2.py:136:__init__] Round robin gradient partitioning: False
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
-[2023-04-14 08:44:13,193] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Basic Optimizer = FusedAdam
-[2023-04-14 08:44:13,193] [INFO] [utils.py:51:is_zero_supported_optimizer] Checking ZeRO support for optimizer=FusedAdam type=<class 'deepspeed.ops.adam.fused_adam.FusedAdam'>
-[2023-04-14 08:44:13,193] [INFO] [logging.py:96:log_dist] [Rank 0] Creating torch.float16 ZeRO stage 2 optimizer
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-[2023-04-14 08:44:13,703] [INFO] [stage_1_and_2.py:133:__init__] Reduce bucket size 500,000,000
-[2023-04-14 08:44:13,704] [INFO] [stage_1_and_2.py:134:__init__] Allgather bucket size 500,000,000
-[2023-04-14 08:44:13,704] [INFO] [stage_1_and_2.py:135:__init__] CPU Offload: False
-[2023-04-14 08:44:13,704] [INFO] [stage_1_and_2.py:136:__init__] Round robin gradient partitioning: False
-Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
@@ -301,73 +221,73 @@ To disable this warning, you can either:
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
 ninja: no work to do.
 Loading extension module utils...
-Time to load utils op: 7.119100570678711 seconds
+Time to load utils op: 0.5721704959869385 seconds
 Loading extension module utils...
-Time to load utils op: 6.615035772323608 seconds
 Loading extension module utils...
 Loading extension module utils...
-Time to load utils op: 7.214683532714844 seconds
 Loading extension module utils...
 Loading extension module utils...
 Loading extension module utils...
 Loading extension module utils...
-Time to load utils op: 7.2167649269104 seconds
-Time to load utils op: 7.216015338897705 seconds
-Time to load utils op: 7.218137264251709 seconds
-Time to load utils op: 7.217472076416016 seconds
-Time to load utils op: 7.218402147293091 seconds
-Rank: 0 partition count [8, 8] and sizes[(164401920, False), (67840, False)] 
+Time to load utils op: 0.6046469211578369 seconds
+Time to load utils op: 0.60459303855896 seconds
+Time to load utils op: 0.6040091514587402 seconds
+Time to load utils op: 0.6041994094848633 secondsTime to load utils op: 0.6039597988128662 seconds
+
+Time to load utils op: 0.6039996147155762 seconds
+Time to load utils op: 0.6041290760040283 seconds
+Rank: 2 partition count [8, 8] and sizes[(164401920, False), (67840, False)] 
+Rank: 3 partition count [8, 8] and sizes[(164401920, False), (67840, False)] 
 Rank: 1 partition count [8, 8] and sizes[(164401920, False), (67840, False)] 
 Rank: 4 partition count [8, 8] and sizes[(164401920, False), (67840, False)] 
 Rank: 6 partition count [8, 8] and sizes[(164401920, False), (67840, False)] 
-Rank: 3 partition count [8, 8] and sizes[(164401920, False), (67840, False)] 
-Rank: 2 partition count [8, 8] and sizes[(164401920, False), (67840, False)] 
+Rank: 0 partition count [8, 8] and sizes[(164401920, False), (67840, False)] 
 Rank: 7 partition count [8, 8] and sizes[(164401920, False), (67840, False)] 
 Rank: 5 partition count [8, 8] and sizes[(164401920, False), (67840, False)] 
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
-Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
-Time to load utils op: 0.0018334388732910156 seconds
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
-Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
-Time to load utils op: 0.0009965896606445312 seconds
+Time to load utils op: 0.0013031959533691406 seconds
+Time to load utils op: 0.0010409355163574219 seconds
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
-Time to load utils op: 0.001102447509765625 seconds
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Time to load utils op: 0.0012707710266113281 seconds
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
-Time to load utils op: 0.0010309219360351562 seconds
-Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Time to load utils op: 0.0009152889251708984 seconds
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
-Time to load utils op: 0.0010993480682373047 seconds
+Time to load utils op: 0.0009307861328125 seconds
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
+Time to load utils op: 0.0007898807525634766 seconds
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
-Time to load utils op: 0.0011150836944580078 seconds
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
-Time to load utils op: 0.0014064311981201172 seconds
-[2023-04-14 08:44:34,254] [INFO] [utils.py:785:see_memory_usage] Before initializing optimizer states
-[2023-04-14 08:44:34,255] [INFO] [utils.py:786:see_memory_usage] MA 3.06 GB         Max_MA 3.06 GB         CA 3.07 GB         Max_CA 3 GB 
-[2023-04-14 08:44:34,255] [INFO] [utils.py:793:see_memory_usage] CPU Virtual Memory:  used = 111.88 GB, percent = 11.1%
-[2023-04-14 08:44:34,948] [INFO] [utils.py:785:see_memory_usage] After initializing optimizer states
-[2023-04-14 08:44:34,949] [INFO] [utils.py:786:see_memory_usage] MA 4.29 GB         Max_MA 4.91 GB         CA 4.91 GB         Max_CA 5 GB 
-[2023-04-14 08:44:34,949] [INFO] [utils.py:793:see_memory_usage] CPU Virtual Memory:  used = 111.87 GB, percent = 11.1%
-[2023-04-14 08:44:34,949] [INFO] [stage_1_and_2.py:489:__init__] optimizer state initialized
-[2023-04-14 08:44:35,656] [INFO] [utils.py:785:see_memory_usage] After initializing ZeRO optimizer
-[2023-04-14 08:44:35,657] [INFO] [utils.py:786:see_memory_usage] MA 4.29 GB         Max_MA 4.29 GB         CA 4.91 GB         Max_CA 5 GB 
-[2023-04-14 08:44:35,657] [INFO] [utils.py:793:see_memory_usage] CPU Virtual Memory:  used = 111.85 GB, percent = 11.1%
-[2023-04-14 08:44:35,659] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Final Optimizer = FusedAdam
-[2023-04-14 08:44:35,659] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed using client LR scheduler
-[2023-04-14 08:44:35,659] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed LR Scheduler = <torch.optim.lr_scheduler.LambdaLR object at 0x7f0590761a30>
-[2023-04-14 08:44:35,659] [INFO] [logging.py:96:log_dist] [Rank 0] step=0, skipped=0, lr=[0.0, 0.0], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 08:44:35,660] [INFO] [config.py:953:print] DeepSpeedEngine configuration:
-[2023-04-14 08:44:35,660] [INFO] [config.py:957:print]   activation_checkpointing_config  {
+Time to load utils op: 0.0012035369873046875 seconds
+[2023-04-21 23:37:08,253] [INFO] [utils.py:785:see_memory_usage] Before initializing optimizer states
+[2023-04-21 23:37:08,253] [INFO] [utils.py:786:see_memory_usage] MA 3.06 GB         Max_MA 3.06 GB         CA 3.07 GB         Max_CA 3 GB 
+[2023-04-21 23:37:08,254] [INFO] [utils.py:793:see_memory_usage] CPU Virtual Memory:  used = 63.06 GB, percent = 6.3%
+[2023-04-21 23:37:08,444] [INFO] [utils.py:785:see_memory_usage] After initializing optimizer states
+[2023-04-21 23:37:08,445] [INFO] [utils.py:786:see_memory_usage] MA 4.29 GB         Max_MA 4.91 GB         CA 4.91 GB         Max_CA 5 GB 
+[2023-04-21 23:37:08,445] [INFO] [utils.py:793:see_memory_usage] CPU Virtual Memory:  used = 63.06 GB, percent = 6.3%
+[2023-04-21 23:37:08,446] [INFO] [stage_1_and_2.py:489:__init__] optimizer state initialized
+[2023-04-21 23:37:08,634] [INFO] [utils.py:785:see_memory_usage] After initializing ZeRO optimizer
+[2023-04-21 23:37:08,634] [INFO] [utils.py:786:see_memory_usage] MA 4.29 GB         Max_MA 4.29 GB         CA 4.91 GB         Max_CA 5 GB 
+[2023-04-21 23:37:08,635] [INFO] [utils.py:793:see_memory_usage] CPU Virtual Memory:  used = 63.06 GB, percent = 6.3%
+[2023-04-21 23:37:08,636] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Final Optimizer = FusedAdam
+[2023-04-21 23:37:08,637] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed using client LR scheduler
+[2023-04-21 23:37:08,637] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed LR Scheduler = <torch.optim.lr_scheduler.LambdaLR object at 0x7fb1f97d5220>
+[2023-04-21 23:37:08,637] [INFO] [logging.py:96:log_dist] [Rank 0] step=0, skipped=0, lr=[0.0, 0.0], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:37:08,637] [INFO] [config.py:953:print] DeepSpeedEngine configuration:
+[2023-04-21 23:37:08,638] [INFO] [config.py:957:print]   activation_checkpointing_config  {
     "partition_activations": false, 
     "contiguous_memory_optimization": false, 
     "cpu_checkpointing": false, 
@@ -375,10 +295,10 @@ Time to load utils op: 0.0014064311981201172 seconds
     "synchronize_checkpoint_boundary": false, 
     "profile": false
 }
-[2023-04-14 08:44:35,660] [INFO] [config.py:957:print]   aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True}
-[2023-04-14 08:44:35,660] [INFO] [config.py:957:print]   amp_enabled .................. False
-[2023-04-14 08:44:35,660] [INFO] [config.py:957:print]   amp_params ................... False
-[2023-04-14 08:44:35,660] [INFO] [config.py:957:print]   autotuning_config ............ {
+[2023-04-21 23:37:08,638] [INFO] [config.py:957:print]   aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True}
+[2023-04-21 23:37:08,638] [INFO] [config.py:957:print]   amp_enabled .................. False
+[2023-04-21 23:37:08,638] [INFO] [config.py:957:print]   amp_params ................... False
+[2023-04-21 23:37:08,638] [INFO] [config.py:957:print]   autotuning_config ............ {
     "enabled": false, 
     "start_step": null, 
     "end_step": null, 
@@ -403,31 +323,31 @@ Time to load utils op: 0.0014064311981201172 seconds
     "min_train_micro_batch_size_per_gpu": 1, 
     "num_tuning_micro_batch_sizes": 3
 }
-[2023-04-14 08:44:35,660] [INFO] [config.py:957:print]   bfloat16_enabled ............. False
-[2023-04-14 08:44:35,660] [INFO] [config.py:957:print]   checkpoint_parallel_write_pipeline  False
-[2023-04-14 08:44:35,660] [INFO] [config.py:957:print]   checkpoint_tag_validation_enabled  True
-[2023-04-14 08:44:35,660] [INFO] [config.py:957:print]   checkpoint_tag_validation_fail  False
-[2023-04-14 08:44:35,660] [INFO] [config.py:957:print]   comms_config ................. <deepspeed.comm.config.DeepSpeedCommsConfig object at 0x7f04cfff4fd0>
-[2023-04-14 08:44:35,660] [INFO] [config.py:957:print]   communication_data_type ...... None
-[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}}
-[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   curriculum_enabled_legacy .... False
-[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   curriculum_params_legacy ..... False
-[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   data_efficiency_config ....... {'enabled': False, 'seed': 1234, 'data_sampling': {'enabled': False, 'num_epochs': 1000, 'num_workers': 0, 'curriculum_learning': {'enabled': False}}, 'data_routing': {'enabled': False, 'random_ltd': {'enabled': False, 'layer_token_lr_schedule': {'enabled': False}}}}
-[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   data_efficiency_enabled ...... False
-[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   dataloader_drop_last ......... False
-[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   disable_allgather ............ False
-[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   dump_state ................... False
-[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   dynamic_loss_scale_args ...... {'init_scale': 65536, 'scale_window': 100, 'delayed_shift': 2, 'min_scale': 1}
-[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   eigenvalue_enabled ........... False
-[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   eigenvalue_gas_boundary_resolution  1
-[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   eigenvalue_layer_name ........ bert.encoder.layer
-[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   eigenvalue_layer_num ......... 0
-[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   eigenvalue_max_iter .......... 100
-[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   eigenvalue_stability ......... 1e-06
-[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   eigenvalue_tol ............... 0.01
-[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   eigenvalue_verbose ........... False
-[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   elasticity_enabled ........... False
-[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   flops_profiler_config ........ {
+[2023-04-21 23:37:08,638] [INFO] [config.py:957:print]   bfloat16_enabled ............. False
+[2023-04-21 23:37:08,638] [INFO] [config.py:957:print]   checkpoint_parallel_write_pipeline  False
+[2023-04-21 23:37:08,638] [INFO] [config.py:957:print]   checkpoint_tag_validation_enabled  True
+[2023-04-21 23:37:08,638] [INFO] [config.py:957:print]   checkpoint_tag_validation_fail  False
+[2023-04-21 23:37:08,638] [INFO] [config.py:957:print]   comms_config ................. <deepspeed.comm.config.DeepSpeedCommsConfig object at 0x7fb1f6c6b1f0>
+[2023-04-21 23:37:08,638] [INFO] [config.py:957:print]   communication_data_type ...... None
+[2023-04-21 23:37:08,638] [INFO] [config.py:957:print]   compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}}
+[2023-04-21 23:37:08,638] [INFO] [config.py:957:print]   curriculum_enabled_legacy .... False
+[2023-04-21 23:37:08,638] [INFO] [config.py:957:print]   curriculum_params_legacy ..... False
+[2023-04-21 23:37:08,638] [INFO] [config.py:957:print]   data_efficiency_config ....... {'enabled': False, 'seed': 1234, 'data_sampling': {'enabled': False, 'num_epochs': 1000, 'num_workers': 0, 'curriculum_learning': {'enabled': False}}, 'data_routing': {'enabled': False, 'random_ltd': {'enabled': False, 'layer_token_lr_schedule': {'enabled': False}}}}
+[2023-04-21 23:37:08,638] [INFO] [config.py:957:print]   data_efficiency_enabled ...... False
+[2023-04-21 23:37:08,638] [INFO] [config.py:957:print]   dataloader_drop_last ......... False
+[2023-04-21 23:37:08,638] [INFO] [config.py:957:print]   disable_allgather ............ False
+[2023-04-21 23:37:08,638] [INFO] [config.py:957:print]   dump_state ................... False
+[2023-04-21 23:37:08,638] [INFO] [config.py:957:print]   dynamic_loss_scale_args ...... {'init_scale': 65536, 'scale_window': 100, 'delayed_shift': 2, 'min_scale': 1}
+[2023-04-21 23:37:08,638] [INFO] [config.py:957:print]   eigenvalue_enabled ........... False
+[2023-04-21 23:37:08,638] [INFO] [config.py:957:print]   eigenvalue_gas_boundary_resolution  1
+[2023-04-21 23:37:08,638] [INFO] [config.py:957:print]   eigenvalue_layer_name ........ bert.encoder.layer
+[2023-04-21 23:37:08,638] [INFO] [config.py:957:print]   eigenvalue_layer_num ......... 0
+[2023-04-21 23:37:08,638] [INFO] [config.py:957:print]   eigenvalue_max_iter .......... 100
+[2023-04-21 23:37:08,638] [INFO] [config.py:957:print]   eigenvalue_stability ......... 1e-06
+[2023-04-21 23:37:08,638] [INFO] [config.py:957:print]   eigenvalue_tol ............... 0.01
+[2023-04-21 23:37:08,638] [INFO] [config.py:957:print]   eigenvalue_verbose ........... False
+[2023-04-21 23:37:08,638] [INFO] [config.py:957:print]   elasticity_enabled ........... False
+[2023-04-21 23:37:08,639] [INFO] [config.py:957:print]   flops_profiler_config ........ {
     "enabled": false, 
     "profile_step": 1, 
     "module_depth": -1, 
@@ -435,21 +355,21 @@ Time to load utils op: 0.0014064311981201172 seconds
     "detailed": true, 
     "output_file": null
 }
-[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   fp16_auto_cast ............... False
-[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   fp16_enabled ................. True
-[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   fp16_master_weights_and_gradients  False
-[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   global_rank .................. 0
-[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   grad_accum_dtype ............. None
-[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   gradient_accumulation_steps .. 1
-[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   gradient_clipping ............ 1.0
-[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   gradient_predivide_factor .... 1.0
-[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   hybrid_engine ................ enabled=True max_out_tokens=512 inference_tp_size=1 release_inference_cache=False pin_parameters=True tp_gather_partition_size=8
-[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   initial_dynamic_scale ........ 65536
-[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   load_universal_checkpoint .... False
-[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   loss_scale ................... 0
-[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   memory_breakdown ............. False
-[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   monitor_config ............... tensorboard=TensorBoardConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') wandb=WandbConfig(enabled=False, group=None, team=None, project='deepspeed') csv_monitor=CSVConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') enabled=False
-[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   nebula_config ................ {
+[2023-04-21 23:37:08,639] [INFO] [config.py:957:print]   fp16_auto_cast ............... False
+[2023-04-21 23:37:08,639] [INFO] [config.py:957:print]   fp16_enabled ................. True
+[2023-04-21 23:37:08,639] [INFO] [config.py:957:print]   fp16_master_weights_and_gradients  False
+[2023-04-21 23:37:08,639] [INFO] [config.py:957:print]   global_rank .................. 0
+[2023-04-21 23:37:08,639] [INFO] [config.py:957:print]   grad_accum_dtype ............. None
+[2023-04-21 23:37:08,639] [INFO] [config.py:957:print]   gradient_accumulation_steps .. 1
+[2023-04-21 23:37:08,639] [INFO] [config.py:957:print]   gradient_clipping ............ 1.0
+[2023-04-21 23:37:08,639] [INFO] [config.py:957:print]   gradient_predivide_factor .... 1.0
+[2023-04-21 23:37:08,639] [INFO] [config.py:957:print]   hybrid_engine ................ enabled=True max_out_tokens=512 inference_tp_size=1 release_inference_cache=False pin_parameters=True tp_gather_partition_size=8
+[2023-04-21 23:37:08,639] [INFO] [config.py:957:print]   initial_dynamic_scale ........ 65536
+[2023-04-21 23:37:08,639] [INFO] [config.py:957:print]   load_universal_checkpoint .... False
+[2023-04-21 23:37:08,639] [INFO] [config.py:957:print]   loss_scale ................... 0
+[2023-04-21 23:37:08,639] [INFO] [config.py:957:print]   memory_breakdown ............. False
+[2023-04-21 23:37:08,639] [INFO] [config.py:957:print]   monitor_config ............... tensorboard=TensorBoardConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') wandb=WandbConfig(enabled=False, group=None, team=None, project='deepspeed') csv_monitor=CSVConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') enabled=False
+[2023-04-21 23:37:08,639] [INFO] [config.py:957:print]   nebula_config ................ {
     "enabled": false, 
     "persistent_storage_path": null, 
     "persistent_time_interval": 100, 
@@ -457,29 +377,29 @@ Time to load utils op: 0.0014064311981201172 seconds
     "enable_nebula_load": true, 
     "load_path": null
 }
-[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   optimizer_legacy_fusion ...... False
-[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   optimizer_name ............... None
-[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   optimizer_params ............. None
-[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0}
-[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   pld_enabled .................. False
-[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   pld_params ................... False
-[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   prescale_gradients ........... False
-[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   scheduler_name ............... None
-[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   scheduler_params ............. None
-[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   sparse_attention ............. None
-[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   sparse_gradients_enabled ..... False
-[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   steps_per_print .............. 10
-[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   train_batch_size ............. 32
-[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   train_micro_batch_size_per_gpu  4
-[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   use_node_local_storage ....... False
-[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   wall_clock_breakdown ......... False
-[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   world_size ................... 8
-[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   zero_allow_untested_optimizer  False
-[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   zero_config .................. stage=2 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=500,000,000 allgather_partitions=True allgather_bucket_size=500,000,000 overlap_comm=False load_from_fp32_weights=True elastic_checkpoint=False offload_param=DeepSpeedZeroOffloadParamConfig(device='none', nvme_path=None, buffer_count=5, buffer_size=100,000,000, max_in_cpu=1,000,000,000, pin_memory=False) offload_optimizer=DeepSpeedZeroOffloadOptimizerConfig(device='none', nvme_path=None, buffer_count=4, pin_memory=False, pipeline=False, pipeline_read=False, pipeline_write=False, fast_init=False) sub_group_size=1,000,000,000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=30000000 param_persistence_threshold=10000 model_persistence_threshold=sys.maxsize max_live_parameters=30000000 max_reuse_distance=1,000,000,000 gather_16bit_weights_on_model_save=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=False memory_efficient_linear=False
-[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   zero_enabled ................. True
-[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   zero_force_ds_cpu_optimizer .. True
-[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   zero_optimization_stage ...... 2
-[2023-04-14 08:44:35,662] [INFO] [config.py:943:print_user_config]   json = {
+[2023-04-21 23:37:08,639] [INFO] [config.py:957:print]   optimizer_legacy_fusion ...... False
+[2023-04-21 23:37:08,639] [INFO] [config.py:957:print]   optimizer_name ............... None
+[2023-04-21 23:37:08,639] [INFO] [config.py:957:print]   optimizer_params ............. None
+[2023-04-21 23:37:08,639] [INFO] [config.py:957:print]   pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0}
+[2023-04-21 23:37:08,639] [INFO] [config.py:957:print]   pld_enabled .................. False
+[2023-04-21 23:37:08,639] [INFO] [config.py:957:print]   pld_params ................... False
+[2023-04-21 23:37:08,639] [INFO] [config.py:957:print]   prescale_gradients ........... False
+[2023-04-21 23:37:08,639] [INFO] [config.py:957:print]   scheduler_name ............... None
+[2023-04-21 23:37:08,639] [INFO] [config.py:957:print]   scheduler_params ............. None
+[2023-04-21 23:37:08,639] [INFO] [config.py:957:print]   sparse_attention ............. None
+[2023-04-21 23:37:08,639] [INFO] [config.py:957:print]   sparse_gradients_enabled ..... False
+[2023-04-21 23:37:08,639] [INFO] [config.py:957:print]   steps_per_print .............. 10
+[2023-04-21 23:37:08,639] [INFO] [config.py:957:print]   train_batch_size ............. 32
+[2023-04-21 23:37:08,639] [INFO] [config.py:957:print]   train_micro_batch_size_per_gpu  4
+[2023-04-21 23:37:08,639] [INFO] [config.py:957:print]   use_node_local_storage ....... False
+[2023-04-21 23:37:08,639] [INFO] [config.py:957:print]   wall_clock_breakdown ......... False
+[2023-04-21 23:37:08,639] [INFO] [config.py:957:print]   world_size ................... 8
+[2023-04-21 23:37:08,639] [INFO] [config.py:957:print]   zero_allow_untested_optimizer  False
+[2023-04-21 23:37:08,639] [INFO] [config.py:957:print]   zero_config .................. stage=2 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=500,000,000 allgather_partitions=True allgather_bucket_size=500,000,000 overlap_comm=False load_from_fp32_weights=True elastic_checkpoint=False offload_param=DeepSpeedZeroOffloadParamConfig(device='none', nvme_path=None, buffer_count=5, buffer_size=100,000,000, max_in_cpu=1,000,000,000, pin_memory=False) offload_optimizer=DeepSpeedZeroOffloadOptimizerConfig(device='none', nvme_path=None, buffer_count=4, pin_memory=False, pipeline=False, pipeline_read=False, pipeline_write=False, fast_init=False) sub_group_size=1,000,000,000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=30000000 param_persistence_threshold=10000 model_persistence_threshold=sys.maxsize max_live_parameters=30000000 max_reuse_distance=1,000,000,000 gather_16bit_weights_on_model_save=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=False memory_efficient_linear=False
+[2023-04-21 23:37:08,639] [INFO] [config.py:957:print]   zero_enabled ................. True
+[2023-04-21 23:37:08,639] [INFO] [config.py:957:print]   zero_force_ds_cpu_optimizer .. True
+[2023-04-21 23:37:08,639] [INFO] [config.py:957:print]   zero_optimization_stage ...... 2
+[2023-04-21 23:37:08,640] [INFO] [config.py:943:print_user_config]   json = {
     "train_batch_size": 32, 
     "train_micro_batch_size_per_gpu": 4, 
     "steps_per_print": 10, 
@@ -505,6 +425,7 @@ Time to load utils op: 0.0014064311981201172 seconds
     "wall_clock_breakdown": false, 
     "hybrid_engine": {
         "enabled": true, 
+        "max_out_tokens": 512, 
         "inference_tp_size": 1, 
         "release_inference_cache": false, 
         "pin_parameters": true, 
@@ -514,7 +435,7 @@ Time to load utils op: 0.0014064311981201172 seconds
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
-Time to load utils op: 0.0014083385467529297 seconds
+Time to load utils op: 0.0008935928344726562 seconds
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
@@ -538,8 +459,12 @@ To disable this warning, you can either:
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
@@ -559,7 +484,6 @@ huggingface/tokenizers: The current process just got forked, after parallelism h
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
@@ -580,16 +504,17 @@ Installed CUDA version 11.4 does not match the version torch was compiled with 1
 Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
 Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
 Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
@@ -606,30 +531,27 @@ huggingface/tokenizers: The current process just got forked, after parallelism h
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
-Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
-huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
-To disable this warning, you can either:
-	- Avoid using `tokenizers` before the fork if possible
-	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combinationUsing /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
-
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
@@ -638,7 +560,6 @@ huggingface/tokenizers: The current process just got forked, after parallelism h
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 Detected CUDA files, patching ldflags
 Emitting ninja build file /home/AdamG012/.cache/torch_extensions/py39_cu113/transformer_inference/build.ninja...
 Building extension module transformer_inference...
@@ -647,351 +568,32 @@ huggingface/tokenizers: The current process just got forked, after parallelism h
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-[1/9] /usr/local/cuda-11.4/bin/nvcc  -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/AdamG012/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/dequantize.cu -o dequantize.cuda.o
-[2/9] /usr/local/cuda-11.4/bin/nvcc  -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/AdamG012/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/gelu.cu -o gelu.cuda.o
-[3/9] /usr/local/cuda-11.4/bin/nvcc  -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/AdamG012/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/relu.cu -o relu.cuda.o
-[4/9] /usr/local/cuda-11.4/bin/nvcc  -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/AdamG012/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/apply_rotary_pos_emb.cu -o apply_rotary_pos_emb.cuda.o
-[5/9] /usr/local/cuda-11.4/bin/nvcc  -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/AdamG012/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu -o transform.cuda.o
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu(57): warning: variable "lane" was declared but never referenced
-
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu(94): warning: variable "half_dim" was declared but never referenced
-
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu(111): warning: variable "vals_half" was declared but never referenced
-
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu(112): warning: variable "output_half" was declared but never referenced
-
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu(129): warning: variable "lane" was declared but never referenced
-
-[6/9] /usr/local/cuda-11.4/bin/nvcc  -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/AdamG012/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/softmax.cu -o softmax.cuda.o
-[7/9] /usr/local/cuda-11.4/bin/nvcc  -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/AdamG012/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu -o layer_norm.cuda.o
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
-          detected during:
-            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=1, maxThreads=256]" 
-(166): here
-            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
-(192): here
-
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
-          detected during:
-            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=1, maxThreads=256]" 
-(166): here
-            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
-(192): here
-
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
-          detected during:
-            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=2, maxThreads=256]" 
-(168): here
-            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
-(192): here
-
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
-          detected during:
-            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=2, maxThreads=256]" 
-(168): here
-            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
-(192): here
-
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
-          detected during:
-            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=4, maxThreads=256]" 
-(170): here
-            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
-(192): here
-
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
-          detected during:
-            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=4, maxThreads=256]" 
-(170): here
-            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
-(192): here
-
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
-          detected during:
-            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=8, maxThreads=256]" 
-(172): here
-            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
-(192): here
-
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
-          detected during:
-            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=8, maxThreads=256]" 
-(172): here
-            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
-(192): here
-
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
-          detected during:
-            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=16, maxThreads=256]" 
-(174): here
-            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
-(192): here
-
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
-          detected during:
-            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=16, maxThreads=256]" 
-(174): here
-            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
-(192): here
-
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
-          detected during:
-            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=2, threadsPerGroup=256, maxThreads=256]" 
-(179): here
-            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
-(192): here
-
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
-          detected during:
-            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=2, threadsPerGroup=256, maxThreads=256]" 
-(179): here
-            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
-(192): here
-
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
-          detected during:
-            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=4, threadsPerGroup=256, maxThreads=256]" 
-(182): here
-            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
-(192): here
-
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
-          detected during:
-            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=4, threadsPerGroup=256, maxThreads=256]" 
-(182): here
-            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
-(192): here
-
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
-          detected during:
-            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=6, threadsPerGroup=256, maxThreads=256]" 
-(185): here
-            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
-(192): here
-
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
-          detected during:
-            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=6, threadsPerGroup=256, maxThreads=256]" 
-(185): here
-            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
-(192): here
-
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
-          detected during:
-            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=8, threadsPerGroup=256, maxThreads=256]" 
-(188): here
-            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
-(192): here
-
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
-          detected during:
-            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=8, threadsPerGroup=256, maxThreads=256]" 
-(188): here
-            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
-(192): here
-
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
-          detected during:
-            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=1, maxThreads=256]" 
-(166): here
-            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
-(200): here
-
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
-          detected during:
-            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=1, maxThreads=256]" 
-(166): here
-            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
-(200): here
-
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
-          detected during:
-            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=2, maxThreads=256]" 
-(168): here
-            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
-(200): here
-
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
-          detected during:
-            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=2, maxThreads=256]" 
-(168): here
-            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
-(200): here
-
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
-          detected during:
-            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=4, maxThreads=256]" 
-(170): here
-            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
-(200): here
-
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
-          detected during:
-            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=4, maxThreads=256]" 
-(170): here
-            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
-(200): here
-
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
-          detected during:
-            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=8, maxThreads=256]" 
-(172): here
-            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
-(200): here
-
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
-          detected during:
-            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=8, maxThreads=256]" 
-(172): here
-            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
-(200): here
-
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
-          detected during:
-            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=16, maxThreads=256]" 
-(174): here
-            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
-(200): here
-
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
-          detected during:
-            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=16, maxThreads=256]" 
-(174): here
-            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
-(200): here
-
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
-          detected during:
-            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=4, threadsPerGroup=256, maxThreads=256]" 
-(179): here
-            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
-(200): here
-
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
-          detected during:
-            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=4, threadsPerGroup=256, maxThreads=256]" 
-(179): here
-            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
-(200): here
-
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
-          detected during:
-            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=8, threadsPerGroup=256, maxThreads=256]" 
-(182): here
-            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
-(200): here
-
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
-          detected during:
-            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=8, threadsPerGroup=256, maxThreads=256]" 
-(182): here
-            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
-(200): here
-
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
-          detected during:
-            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=12, threadsPerGroup=256, maxThreads=256]" 
-(185): here
-            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
-(200): here
-
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
-          detected during:
-            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=12, threadsPerGroup=256, maxThreads=256]" 
-(185): here
-            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
-(200): here
-
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
-          detected during:
-            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=16, threadsPerGroup=256, maxThreads=256]" 
-(188): here
-            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
-(200): here
-
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
-          detected during:
-            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=16, threadsPerGroup=256, maxThreads=256]" 
-(188): here
-            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
-(200): here
-
-[8/9] /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicxx -MMD -MF pt_binding.o.d -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/AdamG012/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++14 -O3 -std=c++14 -g -Wno-reorder -c /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp -o pt_binding.o
-In file included from /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:10:
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes/inference_context.h: In member function ‘void InferenceContext::GenWorkSpace(const unsigned int&, const unsigned int&, const size_t&, const size_t&, const size_t&, const unsigned int&, const bool&, const size_t&, const unsigned int&, unsigned int, unsigned int)’:
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes/inference_context.h:139:52: warning: format ‘%d’ expects argument of type ‘int’, but argument 2 has type ‘size_t’ {aka ‘long unsigned int’} [-Wformat=]
-  139 |                 "Allocatable workspace available (%d tokens) is less than minimum requested "
-      |                                                   ~^
-      |                                                    |
-      |                                                    int
-      |                                                   %ld
-  140 |                 "workspace (%d tokens)\n",
-  141 |                 _max_seq_len,
-      |                 ~~~~~~~~~~~~                        
-      |                 |
-      |                 size_t {aka long unsigned int}
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp: In instantiation of ‘std::vector<at::Tensor> ds_softmax_context(at::Tensor&, at::Tensor&, int, bool, bool, int, float, bool, bool, int, bool, unsigned int, unsigned int, at::Tensor&) [with T = float]’:
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:1750:99:   required from here
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:536:50: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
-  536 |                                      {hidden_dim * InferenceContext::Instance().GetMaxTokenLenght(),
-      |                                       ~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:536:50: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:537:41: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
-  537 |                                       k * InferenceContext::Instance().GetMaxTokenLenght(),
-      |                                       ~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:537:41: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:545:38: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
-  545 |                          {hidden_dim * InferenceContext::Instance().GetMaxTokenLenght(),
-      |                           ~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:545:38: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:546:29: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
-  546 |                           k * InferenceContext::Instance().GetMaxTokenLenght(),
-      |                           ~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:546:29: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp: In instantiation of ‘std::vector<at::Tensor> ds_softmax_context(at::Tensor&, at::Tensor&, int, bool, bool, int, float, bool, bool, int, bool, unsigned int, unsigned int, at::Tensor&) [with T = __half]’:
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:1753:49:   required from here
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:536:50: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
-  536 |                                      {hidden_dim * InferenceContext::Instance().GetMaxTokenLenght(),
-      |                                       ~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:536:50: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:537:41: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
-  537 |                                       k * InferenceContext::Instance().GetMaxTokenLenght(),
-      |                                       ~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:537:41: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:545:38: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
-  545 |                          {hidden_dim * InferenceContext::Instance().GetMaxTokenLenght(),
-      |                           ~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:545:38: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:546:29: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
-  546 |                           k * InferenceContext::Instance().GetMaxTokenLenght(),
-      |                           ~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:546:29: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
-[9/9] /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicxx pt_binding.o gelu.cuda.o relu.cuda.o layer_norm.cuda.o softmax.cuda.o dequantize.cuda.o apply_rotary_pos_emb.cuda.o transform.cuda.o -shared -lcurand -L/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/lib -lc10 -lc10_cuda -ltorch_cpu -ltorch_cuda_cu -ltorch_cuda_cpp -ltorch -ltorch_python -L/usr/local/cuda-11.4/lib64 -lcudart -o transformer_inference.so
-Loading extension module transformer_inference...
-Loading extension module transformer_inference...
-Time to load transformer_inference op: 33.57252836227417 seconds
-Time to load transformer_inference op: 33.4696843624115 seconds
+ninja: no work to do.
 Loading extension module transformer_inference...
-Time to load transformer_inference op: 33.380457639694214 seconds
 Loading extension module transformer_inference...
+Time to load transformer_inference op: 0.695580244064331 seconds
+Time to load transformer_inference op: 0.6461379528045654 seconds
+[2023-04-21 23:37:09,607] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed-Inference config: {'layer_id': 0, 'hidden_size': 2048, 'intermediate_size': 8192, 'heads': 32, 'num_hidden_layers': -1, 'fp16': True, 'pre_layer_norm': True, 'local_rank': -1, 'stochastic_mode': False, 'epsilon': 1e-05, 'mp_size': 1, 'q_int8': False, 'scale_attention': True, 'triangular_masking': True, 'local_attention': False, 'window_size': 1, 'rotary_dim': -1, 'rotate_half': False, 'rotate_every_two': True, 'return_tuple': True, 'mlp_after_attn': True, 'mlp_act_func_type': <ActivationFuncType.ReLU: 2>, 'specialized_mode': False, 'training_mp_size': 1, 'bigscience_bloom': False, 'max_out_tokens': 512, 'min_out_tokens': 512, 'scale_attn_by_inverse_layer_idx': False, 'enable_qkv_quantization': False, 'use_mup': False, 'return_single_tuple': False, 'set_empty_params': True, 'transposed_mode': True}
 Loading extension module transformer_inference...
-Time to load transformer_inference op: 33.478936433792114 seconds
-Time to load transformer_inference op: 33.23845458030701 seconds
 Loading extension module transformer_inference...
-Time to load transformer_inference op: 32.77969670295715 seconds
-[2023-04-14 08:45:10,904] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed-Inference config: {'layer_id': 0, 'hidden_size': 2048, 'intermediate_size': 8192, 'heads': 32, 'num_hidden_layers': -1, 'fp16': True, 'pre_layer_norm': True, 'local_rank': -1, 'stochastic_mode': False, 'epsilon': 1e-05, 'mp_size': 1, 'q_int8': False, 'scale_attention': True, 'triangular_masking': True, 'local_attention': False, 'window_size': 1, 'rotary_dim': -1, 'rotate_half': False, 'rotate_every_two': True, 'return_tuple': True, 'mlp_after_attn': True, 'mlp_act_func_type': <ActivationFuncType.ReLU: 2>, 'specialized_mode': False, 'training_mp_size': 1, 'bigscience_bloom': False, 'max_out_tokens': 512, 'min_out_tokens': 512, 'scale_attn_by_inverse_layer_idx': False, 'enable_qkv_quantization': False, 'use_mup': False, 'return_single_tuple': False, 'set_empty_params': True, 'transposed_mode': True}
+Time to load transformer_inference op: 0.7190039157867432 seconds
+Time to load transformer_inference op: 0.7204358577728271 seconds
 Loading extension module transformer_inference...
-Time to load transformer_inference op: 33.499565839767456 seconds
+Time to load transformer_inference op: 0.7205994129180908 seconds
 Loading extension module transformer_inference...
+Time to load transformer_inference op: 0.722914457321167 seconds
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-Time to load transformer_inference op: 33.51667404174805 seconds
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Loading extension module transformer_inference...
+Loading extension module transformer_inference...
+Time to load transformer_inference op: 0.7355391979217529 seconds
+Time to load transformer_inference op: 0.657106876373291 seconds
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
@@ -1032,7 +634,6 @@ huggingface/tokenizers: The current process just got forked, after parallelism h
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
@@ -1048,14 +649,13 @@ To disable this warning, you can either:
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
 Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
 Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
-No modifications detected for re-loaded extension module transformer_inference, skipping build step...
-Loading extension module transformer_inference...
-Time to load transformer_inference op: 0.17304539680480957 seconds
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
@@ -1069,7 +669,6 @@ huggingface/tokenizers: The current process just got forked, after parallelism h
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
@@ -1078,32 +677,39 @@ huggingface/tokenizers: The current process just got forked, after parallelism h
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
-No modifications detected for re-loaded extension module transformer_inference, skipping build step...
-Loading extension module transformer_inference...
-Time to load transformer_inference op: 0.2198350429534912 seconds
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module transformer_inference, skipping build step...
+Loading extension module transformer_inference...
+Time to load transformer_inference op: 0.11342453956604004 seconds
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module transformer_inference, skipping build step...
+Loading extension module transformer_inference...
+Time to load transformer_inference op: 0.11234903335571289 seconds
 Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module transformer_inference, skipping build step...
 Loading extension module transformer_inference...
-Time to load transformer_inference op: 0.23125028610229492 seconds
+Time to load transformer_inference op: 0.11516976356506348 seconds
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module transformer_inference, skipping build step...
 Loading extension module transformer_inference...
-Time to load transformer_inference op: 0.23965048789978027 seconds
-Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
-huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
-To disable this warning, you can either:
-	- Avoid using `tokenizers` before the fork if possible
-	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Time to load transformer_inference op: 0.11846494674682617 seconds
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module transformer_inference, skipping build step...
+Loading extension module transformer_inference...
+Time to load transformer_inference op: 0.13445663452148438 seconds
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module transformer_inference, skipping build step...
+Loading extension module transformer_inference...
+Time to load transformer_inference op: 0.13429594039916992 seconds
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
@@ -1111,15 +717,11 @@ To disable this warning, you can either:
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module transformer_inference, skipping build step...
 Loading extension module transformer_inference...
-Time to load transformer_inference op: 0.24375414848327637 seconds
+Time to load transformer_inference op: 0.12044620513916016 seconds
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
-No modifications detected for re-loaded extension module transformer_inference, skipping build step...
-Loading extension module transformer_inference...
-Time to load transformer_inference op: 0.25126194953918457 seconds
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
@@ -1132,7 +734,6 @@ huggingface/tokenizers: The current process just got forked, after parallelism h
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
@@ -1146,29 +747,26 @@ To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
-No modifications detected for re-loaded extension module transformer_inference, skipping build step...
-Loading extension module transformer_inference...
-Time to load transformer_inference op: 0.31780099868774414 seconds
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module transformer_inference, skipping build step...
 Loading extension module transformer_inference...
-Time to load transformer_inference op: 0.1689906120300293 seconds
+Time to load transformer_inference op: 0.15472984313964844 seconds
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-No modifications detected for re-loaded extension module transformer_inference, skipping build step...
-Loading extension module transformer_inference...
-Time to load transformer_inference op: 0.33678388595581055 seconds
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
 Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
@@ -1187,7 +785,6 @@ To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
 Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
-Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
@@ -1201,49 +798,41 @@ huggingface/tokenizers: The current process just got forked, after parallelism h
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
-No modifications detected for re-loaded extension module transformer_inference, skipping build step...
-Loading extension module transformer_inference...
-Time to load transformer_inference op: 0.2203667163848877 seconds
 Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module transformer_inference, skipping build step...
 Loading extension module transformer_inference...
-Time to load transformer_inference op: 0.2368457317352295 seconds
-huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
-To disable this warning, you can either:
-	- Avoid using `tokenizers` before the fork if possible
-	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Time to load transformer_inference op: 0.11486577987670898 seconds
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module transformer_inference, skipping build step...
 Loading extension module transformer_inference...
-Time to load transformer_inference op: 0.2534804344177246 seconds
+Time to load transformer_inference op: 0.11190509796142578 seconds
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
-No modifications detected for re-loaded extension module transformer_inference, skipping build step...
-Loading extension module transformer_inference...
-Time to load transformer_inference op: 0.2447066307067871 seconds
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module transformer_inference, skipping build step...
 Loading extension module transformer_inference...
-Time to load transformer_inference op: 0.27658915519714355 seconds
-Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
-Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+Time to load transformer_inference op: 0.1143035888671875 seconds
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+No modifications detected for re-loaded extension module transformer_inference, skipping build step...
+Loading extension module transformer_inference...
+Time to load transformer_inference op: 0.11675357818603516 seconds
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
@@ -1251,45 +840,58 @@ To disable this warning, you can either:
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module transformer_inference, skipping build step...
 Loading extension module transformer_inference...
-Time to load transformer_inference op: 0.3282308578491211 seconds
+Time to load transformer_inference op: 0.12160348892211914 seconds
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module transformer_inference, skipping build step...
+Loading extension module transformer_inference...
+Time to load transformer_inference op: 0.1185293197631836 seconds
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module transformer_inference, skipping build step...
 Loading extension module transformer_inference...
-Time to load transformer_inference op: 0.8675589561462402 seconds
-*****************[end] Initialized Actor Model [end] (duration: 119.75s)******************
+Time to load transformer_inference op: 0.13269996643066406 seconds
+******************[end] Initialized Actor Model [end] (duration: 40.32s)******************
 *************************[start] Initializing Ref Model [start] **************************
-Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
-
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module transformer_inference, skipping build step...
+Loading extension module transformer_inference...
+Time to load transformer_inference op: 0.16194510459899902 seconds
+[2023-04-21 23:37:27,313] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed info: version=0.9.0, git-hash=unknown, git-branch=unknown
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
-Time to load utils op: 1.891409158706665 seconds
+Time to load utils op: 0.0013289451599121094 seconds
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
-Time to load utils op: 0.8250091075897217 seconds
+Time to load utils op: 0.0012705326080322266 seconds
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
-Time to load utils op: 0.0019402503967285156 seconds
+Time to load utils op: 0.0012392997741699219 seconds
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
-Time to load utils op: 0.0012993812561035156 seconds
+Time to load utils op: 0.0012006759643554688 seconds
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
-Time to load utils op: 0.0013997554779052734 seconds
+Time to load utils op: 0.0012307167053222656 seconds
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
-Time to load utils op: 0.0013012886047363281 seconds
+Time to load utils op: 0.0012030601501464844 seconds
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
-Time to load utils op: 0.0018343925476074219 seconds
-[2023-04-14 08:45:57,529] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed info: version=0.9.0, git-hash=unknown, git-branch=unknown
-[2023-04-14 08:46:12,698] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False
-[2023-04-14 08:46:13,190] [INFO] [config.py:953:print] DeepSpeedEngine configuration:
-[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   activation_checkpointing_config  {
+Time to load utils op: 0.0012555122375488281 seconds
+[2023-04-21 23:37:35,898] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False
+[2023-04-21 23:37:35,899] [INFO] [config.py:953:print] DeepSpeedEngine configuration:
+[2023-04-21 23:37:35,900] [INFO] [config.py:957:print]   activation_checkpointing_config  {
     "partition_activations": false, 
     "contiguous_memory_optimization": false, 
     "cpu_checkpointing": false, 
@@ -1297,10 +899,10 @@ Time to load utils op: 0.0018343925476074219 seconds
     "synchronize_checkpoint_boundary": false, 
     "profile": false
 }
-[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True}
-[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   amp_enabled .................. False
-[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   amp_params ................... False
-[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   autotuning_config ............ {
+[2023-04-21 23:37:35,900] [INFO] [config.py:957:print]   aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True}
+[2023-04-21 23:37:35,900] [INFO] [config.py:957:print]   amp_enabled .................. False
+[2023-04-21 23:37:35,900] [INFO] [config.py:957:print]   amp_params ................... False
+[2023-04-21 23:37:35,900] [INFO] [config.py:957:print]   autotuning_config ............ {
     "enabled": false, 
     "start_step": null, 
     "end_step": null, 
@@ -1325,31 +927,31 @@ Time to load utils op: 0.0018343925476074219 seconds
     "min_train_micro_batch_size_per_gpu": 1, 
     "num_tuning_micro_batch_sizes": 3
 }
-[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   bfloat16_enabled ............. False
-[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   checkpoint_parallel_write_pipeline  False
-[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   checkpoint_tag_validation_enabled  True
-[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   checkpoint_tag_validation_fail  False
-[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   comms_config ................. <deepspeed.comm.config.DeepSpeedCommsConfig object at 0x7f0511539820>
-[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   communication_data_type ...... None
-[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}}
-[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   curriculum_enabled_legacy .... False
-[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   curriculum_params_legacy ..... False
-[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   data_efficiency_config ....... {'enabled': False, 'seed': 1234, 'data_sampling': {'enabled': False, 'num_epochs': 1000, 'num_workers': 0, 'curriculum_learning': {'enabled': False}}, 'data_routing': {'enabled': False, 'random_ltd': {'enabled': False, 'layer_token_lr_schedule': {'enabled': False}}}}
-[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   data_efficiency_enabled ...... False
-[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   dataloader_drop_last ......... False
-[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   disable_allgather ............ False
-[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   dump_state ................... False
-[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   dynamic_loss_scale_args ...... None
-[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   eigenvalue_enabled ........... False
-[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   eigenvalue_gas_boundary_resolution  1
-[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   eigenvalue_layer_name ........ bert.encoder.layer
-[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   eigenvalue_layer_num ......... 0
-[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   eigenvalue_max_iter .......... 100
-[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   eigenvalue_stability ......... 1e-06
-[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   eigenvalue_tol ............... 0.01
-[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   eigenvalue_verbose ........... False
-[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   elasticity_enabled ........... False
-[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   flops_profiler_config ........ {
+[2023-04-21 23:37:35,900] [INFO] [config.py:957:print]   bfloat16_enabled ............. False
+[2023-04-21 23:37:35,900] [INFO] [config.py:957:print]   checkpoint_parallel_write_pipeline  False
+[2023-04-21 23:37:35,900] [INFO] [config.py:957:print]   checkpoint_tag_validation_enabled  True
+[2023-04-21 23:37:35,900] [INFO] [config.py:957:print]   checkpoint_tag_validation_fail  False
+[2023-04-21 23:37:35,900] [INFO] [config.py:957:print]   comms_config ................. <deepspeed.comm.config.DeepSpeedCommsConfig object at 0x7fb1f8fb88e0>
+[2023-04-21 23:37:35,900] [INFO] [config.py:957:print]   communication_data_type ...... None
+[2023-04-21 23:37:35,900] [INFO] [config.py:957:print]   compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}}
+[2023-04-21 23:37:35,900] [INFO] [config.py:957:print]   curriculum_enabled_legacy .... False
+[2023-04-21 23:37:35,900] [INFO] [config.py:957:print]   curriculum_params_legacy ..... False
+[2023-04-21 23:37:35,900] [INFO] [config.py:957:print]   data_efficiency_config ....... {'enabled': False, 'seed': 1234, 'data_sampling': {'enabled': False, 'num_epochs': 1000, 'num_workers': 0, 'curriculum_learning': {'enabled': False}}, 'data_routing': {'enabled': False, 'random_ltd': {'enabled': False, 'layer_token_lr_schedule': {'enabled': False}}}}
+[2023-04-21 23:37:35,900] [INFO] [config.py:957:print]   data_efficiency_enabled ...... False
+[2023-04-21 23:37:35,900] [INFO] [config.py:957:print]   dataloader_drop_last ......... False
+[2023-04-21 23:37:35,900] [INFO] [config.py:957:print]   disable_allgather ............ False
+[2023-04-21 23:37:35,900] [INFO] [config.py:957:print]   dump_state ................... False
+[2023-04-21 23:37:35,900] [INFO] [config.py:957:print]   dynamic_loss_scale_args ...... None
+[2023-04-21 23:37:35,900] [INFO] [config.py:957:print]   eigenvalue_enabled ........... False
+[2023-04-21 23:37:35,900] [INFO] [config.py:957:print]   eigenvalue_gas_boundary_resolution  1
+[2023-04-21 23:37:35,900] [INFO] [config.py:957:print]   eigenvalue_layer_name ........ bert.encoder.layer
+[2023-04-21 23:37:35,901] [INFO] [config.py:957:print]   eigenvalue_layer_num ......... 0
+[2023-04-21 23:37:35,901] [INFO] [config.py:957:print]   eigenvalue_max_iter .......... 100
+[2023-04-21 23:37:35,901] [INFO] [config.py:957:print]   eigenvalue_stability ......... 1e-06
+[2023-04-21 23:37:35,901] [INFO] [config.py:957:print]   eigenvalue_tol ............... 0.01
+[2023-04-21 23:37:35,901] [INFO] [config.py:957:print]   eigenvalue_verbose ........... False
+[2023-04-21 23:37:35,901] [INFO] [config.py:957:print]   elasticity_enabled ........... False
+[2023-04-21 23:37:35,901] [INFO] [config.py:957:print]   flops_profiler_config ........ {
     "enabled": false, 
     "profile_step": 1, 
     "module_depth": -1, 
@@ -1357,21 +959,21 @@ Time to load utils op: 0.0018343925476074219 seconds
     "detailed": true, 
     "output_file": null
 }
-[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   fp16_auto_cast ............... False
-[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   fp16_enabled ................. True
-[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   fp16_master_weights_and_gradients  False
-[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   global_rank .................. 0
-[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   grad_accum_dtype ............. None
-[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   gradient_accumulation_steps .. 1
-[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   gradient_clipping ............ 1.0
-[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   gradient_predivide_factor .... 1.0
-[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   hybrid_engine ................ enabled=False max_out_tokens=512 inference_tp_size=1 release_inference_cache=False pin_parameters=True tp_gather_partition_size=8
-[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   initial_dynamic_scale ........ 65536
-[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   load_universal_checkpoint .... False
-[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   loss_scale ................... 0
-[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   memory_breakdown ............. False
-[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   monitor_config ............... tensorboard=TensorBoardConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') wandb=WandbConfig(enabled=False, group=None, team=None, project='deepspeed') csv_monitor=CSVConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') enabled=False
-[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   nebula_config ................ {
+[2023-04-21 23:37:35,901] [INFO] [config.py:957:print]   fp16_auto_cast ............... False
+[2023-04-21 23:37:35,901] [INFO] [config.py:957:print]   fp16_enabled ................. True
+[2023-04-21 23:37:35,901] [INFO] [config.py:957:print]   fp16_master_weights_and_gradients  False
+[2023-04-21 23:37:35,901] [INFO] [config.py:957:print]   global_rank .................. 0
+[2023-04-21 23:37:35,901] [INFO] [config.py:957:print]   grad_accum_dtype ............. None
+[2023-04-21 23:37:35,901] [INFO] [config.py:957:print]   gradient_accumulation_steps .. 1
+[2023-04-21 23:37:35,901] [INFO] [config.py:957:print]   gradient_clipping ............ 1.0
+[2023-04-21 23:37:35,901] [INFO] [config.py:957:print]   gradient_predivide_factor .... 1.0
+[2023-04-21 23:37:35,901] [INFO] [config.py:957:print]   hybrid_engine ................ enabled=False max_out_tokens=512 inference_tp_size=1 release_inference_cache=False pin_parameters=True tp_gather_partition_size=8
+[2023-04-21 23:37:35,901] [INFO] [config.py:957:print]   initial_dynamic_scale ........ 65536
+[2023-04-21 23:37:35,901] [INFO] [config.py:957:print]   load_universal_checkpoint .... False
+[2023-04-21 23:37:35,901] [INFO] [config.py:957:print]   loss_scale ................... 0
+[2023-04-21 23:37:35,901] [INFO] [config.py:957:print]   memory_breakdown ............. False
+[2023-04-21 23:37:35,901] [INFO] [config.py:957:print]   monitor_config ............... tensorboard=TensorBoardConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') wandb=WandbConfig(enabled=False, group=None, team=None, project='deepspeed') csv_monitor=CSVConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') enabled=False
+[2023-04-21 23:37:35,901] [INFO] [config.py:957:print]   nebula_config ................ {
     "enabled": false, 
     "persistent_storage_path": null, 
     "persistent_time_interval": 100, 
@@ -1379,29 +981,29 @@ Time to load utils op: 0.0018343925476074219 seconds
     "enable_nebula_load": true, 
     "load_path": null
 }
-[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   optimizer_legacy_fusion ...... False
-[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   optimizer_name ............... None
-[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   optimizer_params ............. None
-[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0}
-[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   pld_enabled .................. False
-[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   pld_params ................... False
-[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   prescale_gradients ........... False
-[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   scheduler_name ............... None
-[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   scheduler_params ............. None
-[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   sparse_attention ............. None
-[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   sparse_gradients_enabled ..... False
-[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   steps_per_print .............. 10
-[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   train_batch_size ............. 32
-[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   train_micro_batch_size_per_gpu  4
-[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   use_node_local_storage ....... False
-[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   wall_clock_breakdown ......... False
-[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   world_size ................... 8
-[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   zero_allow_untested_optimizer  False
-[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   zero_config .................. stage=0 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=500,000,000 allgather_partitions=True allgather_bucket_size=500,000,000 overlap_comm=False load_from_fp32_weights=True elastic_checkpoint=False offload_param=DeepSpeedZeroOffloadParamConfig(device='none', nvme_path=None, buffer_count=5, buffer_size=100,000,000, max_in_cpu=1,000,000,000, pin_memory=False) offload_optimizer=None sub_group_size=1,000,000,000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=50,000,000 param_persistence_threshold=10000 model_persistence_threshold=sys.maxsize max_live_parameters=1,000,000,000 max_reuse_distance=1,000,000,000 gather_16bit_weights_on_model_save=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=False memory_efficient_linear=False
-[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   zero_enabled ................. False
-[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   zero_force_ds_cpu_optimizer .. True
-[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   zero_optimization_stage ...... 0
-[2023-04-14 08:46:13,192] [INFO] [config.py:943:print_user_config]   json = {
+[2023-04-21 23:37:35,901] [INFO] [config.py:957:print]   optimizer_legacy_fusion ...... False
+[2023-04-21 23:37:35,901] [INFO] [config.py:957:print]   optimizer_name ............... None
+[2023-04-21 23:37:35,901] [INFO] [config.py:957:print]   optimizer_params ............. None
+[2023-04-21 23:37:35,901] [INFO] [config.py:957:print]   pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0}
+[2023-04-21 23:37:35,901] [INFO] [config.py:957:print]   pld_enabled .................. False
+[2023-04-21 23:37:35,901] [INFO] [config.py:957:print]   pld_params ................... False
+[2023-04-21 23:37:35,901] [INFO] [config.py:957:print]   prescale_gradients ........... False
+[2023-04-21 23:37:35,901] [INFO] [config.py:957:print]   scheduler_name ............... None
+[2023-04-21 23:37:35,901] [INFO] [config.py:957:print]   scheduler_params ............. None
+[2023-04-21 23:37:35,901] [INFO] [config.py:957:print]   sparse_attention ............. None
+[2023-04-21 23:37:35,901] [INFO] [config.py:957:print]   sparse_gradients_enabled ..... False
+[2023-04-21 23:37:35,902] [INFO] [config.py:957:print]   steps_per_print .............. 10
+[2023-04-21 23:37:35,902] [INFO] [config.py:957:print]   train_batch_size ............. 32
+[2023-04-21 23:37:35,902] [INFO] [config.py:957:print]   train_micro_batch_size_per_gpu  4
+[2023-04-21 23:37:35,902] [INFO] [config.py:957:print]   use_node_local_storage ....... False
+[2023-04-21 23:37:35,902] [INFO] [config.py:957:print]   wall_clock_breakdown ......... False
+[2023-04-21 23:37:35,902] [INFO] [config.py:957:print]   world_size ................... 8
+[2023-04-21 23:37:35,902] [INFO] [config.py:957:print]   zero_allow_untested_optimizer  False
+[2023-04-21 23:37:35,902] [INFO] [config.py:957:print]   zero_config .................. stage=0 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=500,000,000 allgather_partitions=True allgather_bucket_size=500,000,000 overlap_comm=False load_from_fp32_weights=True elastic_checkpoint=False offload_param=DeepSpeedZeroOffloadParamConfig(device='none', nvme_path=None, buffer_count=5, buffer_size=100,000,000, max_in_cpu=1,000,000,000, pin_memory=False) offload_optimizer=None sub_group_size=1,000,000,000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=50,000,000 param_persistence_threshold=10000 model_persistence_threshold=sys.maxsize max_live_parameters=1,000,000,000 max_reuse_distance=1,000,000,000 gather_16bit_weights_on_model_save=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=False memory_efficient_linear=False
+[2023-04-21 23:37:35,902] [INFO] [config.py:957:print]   zero_enabled ................. False
+[2023-04-21 23:37:35,902] [INFO] [config.py:957:print]   zero_force_ds_cpu_optimizer .. True
+[2023-04-21 23:37:35,902] [INFO] [config.py:957:print]   zero_optimization_stage ...... 0
+[2023-04-21 23:37:35,902] [INFO] [config.py:943:print_user_config]   json = {
     "train_batch_size": 32, 
     "train_micro_batch_size_per_gpu": 4, 
     "steps_per_print": 10, 
@@ -1423,9 +1025,34 @@ Time to load utils op: 0.0018343925476074219 seconds
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
-Time to load utils op: 0.0015451908111572266 seconds
-*******************[end] Initialized Ref Model [end] (duration: 59.70s)*******************
-************************[start] Initializing Critic Model [start] ************************
+Time to load utils op: 0.0016591548919677734 seconds
+*******************[end] Initialized Ref Model [end] (duration: 25.47s)*******************
+*************************[start] Initializing EMA Model [start] **************************
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 0.0015752315521240234 seconds
+[2023-04-21 23:37:53,908] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed info: version=0.9.0, git-hash=unknown, git-branch=unknown
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 0.0014162063598632812 seconds
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 0.011510372161865234 seconds
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 0.0012710094451904297 seconds
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 0.0012392997741699219 seconds
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 0.001157522201538086 seconds
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
@@ -1434,7 +1061,154 @@ Installed CUDA version 11.4 does not match the version torch was compiled with 1
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module fused_adam, skipping build step...
 Loading extension module fused_adam...
-Time to load fused_adam op: 0.0030541419982910156 seconds
+Time to load fused_adam op: 0.0017504692077636719 seconds
+[2023-04-21 23:37:59,716] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False
+[2023-04-21 23:37:59,717] [INFO] [config.py:953:print] DeepSpeedEngine configuration:
+[2023-04-21 23:37:59,717] [INFO] [config.py:957:print]   activation_checkpointing_config  {
+    "partition_activations": false, 
+    "contiguous_memory_optimization": false, 
+    "cpu_checkpointing": false, 
+    "number_checkpoints": null, 
+    "synchronize_checkpoint_boundary": false, 
+    "profile": false
+}
+[2023-04-21 23:37:59,718] [INFO] [config.py:957:print]   aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True}
+[2023-04-21 23:37:59,718] [INFO] [config.py:957:print]   amp_enabled .................. False
+[2023-04-21 23:37:59,718] [INFO] [config.py:957:print]   amp_params ................... False
+[2023-04-21 23:37:59,718] [INFO] [config.py:957:print]   autotuning_config ............ {
+    "enabled": false, 
+    "start_step": null, 
+    "end_step": null, 
+    "metric_path": null, 
+    "arg_mappings": null, 
+    "metric": "throughput", 
+    "model_info": null, 
+    "results_dir": "autotuning_results", 
+    "exps_dir": "autotuning_exps", 
+    "overwrite": true, 
+    "fast": true, 
+    "start_profile_step": 3, 
+    "end_profile_step": 5, 
+    "tuner_type": "gridsearch", 
+    "tuner_early_stopping": 5, 
+    "tuner_num_trials": 50, 
+    "model_info_path": null, 
+    "mp_size": 1, 
+    "max_train_batch_size": null, 
+    "min_train_batch_size": 1, 
+    "max_train_micro_batch_size_per_gpu": 1.024000e+03, 
+    "min_train_micro_batch_size_per_gpu": 1, 
+    "num_tuning_micro_batch_sizes": 3
+}
+[2023-04-21 23:37:59,718] [INFO] [config.py:957:print]   bfloat16_enabled ............. False
+[2023-04-21 23:37:59,718] [INFO] [config.py:957:print]   checkpoint_parallel_write_pipeline  False
+[2023-04-21 23:37:59,718] [INFO] [config.py:957:print]   checkpoint_tag_validation_enabled  True
+[2023-04-21 23:37:59,718] [INFO] [config.py:957:print]   checkpoint_tag_validation_fail  False
+[2023-04-21 23:37:59,718] [INFO] [config.py:957:print]   comms_config ................. <deepspeed.comm.config.DeepSpeedCommsConfig object at 0x7fb34bfbabb0>
+[2023-04-21 23:37:59,718] [INFO] [config.py:957:print]   communication_data_type ...... None
+[2023-04-21 23:37:59,718] [INFO] [config.py:957:print]   compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}}
+[2023-04-21 23:37:59,718] [INFO] [config.py:957:print]   curriculum_enabled_legacy .... False
+[2023-04-21 23:37:59,718] [INFO] [config.py:957:print]   curriculum_params_legacy ..... False
+[2023-04-21 23:37:59,718] [INFO] [config.py:957:print]   data_efficiency_config ....... {'enabled': False, 'seed': 1234, 'data_sampling': {'enabled': False, 'num_epochs': 1000, 'num_workers': 0, 'curriculum_learning': {'enabled': False}}, 'data_routing': {'enabled': False, 'random_ltd': {'enabled': False, 'layer_token_lr_schedule': {'enabled': False}}}}
+[2023-04-21 23:37:59,718] [INFO] [config.py:957:print]   data_efficiency_enabled ...... False
+[2023-04-21 23:37:59,718] [INFO] [config.py:957:print]   dataloader_drop_last ......... False
+[2023-04-21 23:37:59,718] [INFO] [config.py:957:print]   disable_allgather ............ False
+[2023-04-21 23:37:59,718] [INFO] [config.py:957:print]   dump_state ................... False
+[2023-04-21 23:37:59,718] [INFO] [config.py:957:print]   dynamic_loss_scale_args ...... None
+[2023-04-21 23:37:59,718] [INFO] [config.py:957:print]   eigenvalue_enabled ........... False
+[2023-04-21 23:37:59,718] [INFO] [config.py:957:print]   eigenvalue_gas_boundary_resolution  1
+[2023-04-21 23:37:59,718] [INFO] [config.py:957:print]   eigenvalue_layer_name ........ bert.encoder.layer
+[2023-04-21 23:37:59,718] [INFO] [config.py:957:print]   eigenvalue_layer_num ......... 0
+[2023-04-21 23:37:59,718] [INFO] [config.py:957:print]   eigenvalue_max_iter .......... 100
+[2023-04-21 23:37:59,718] [INFO] [config.py:957:print]   eigenvalue_stability ......... 1e-06
+[2023-04-21 23:37:59,718] [INFO] [config.py:957:print]   eigenvalue_tol ............... 0.01
+[2023-04-21 23:37:59,718] [INFO] [config.py:957:print]   eigenvalue_verbose ........... False
+[2023-04-21 23:37:59,718] [INFO] [config.py:957:print]   elasticity_enabled ........... False
+[2023-04-21 23:37:59,718] [INFO] [config.py:957:print]   flops_profiler_config ........ {
+    "enabled": false, 
+    "profile_step": 1, 
+    "module_depth": -1, 
+    "top_modules": 1, 
+    "detailed": true, 
+    "output_file": null
+}
+[2023-04-21 23:37:59,718] [INFO] [config.py:957:print]   fp16_auto_cast ............... False
+[2023-04-21 23:37:59,718] [INFO] [config.py:957:print]   fp16_enabled ................. True
+[2023-04-21 23:37:59,718] [INFO] [config.py:957:print]   fp16_master_weights_and_gradients  False
+[2023-04-21 23:37:59,719] [INFO] [config.py:957:print]   global_rank .................. 0
+[2023-04-21 23:37:59,719] [INFO] [config.py:957:print]   grad_accum_dtype ............. None
+[2023-04-21 23:37:59,719] [INFO] [config.py:957:print]   gradient_accumulation_steps .. 1
+[2023-04-21 23:37:59,719] [INFO] [config.py:957:print]   gradient_clipping ............ 1.0
+[2023-04-21 23:37:59,719] [INFO] [config.py:957:print]   gradient_predivide_factor .... 1.0
+[2023-04-21 23:37:59,719] [INFO] [config.py:957:print]   hybrid_engine ................ enabled=False max_out_tokens=512 inference_tp_size=1 release_inference_cache=False pin_parameters=True tp_gather_partition_size=8
+[2023-04-21 23:37:59,719] [INFO] [config.py:957:print]   initial_dynamic_scale ........ 65536
+[2023-04-21 23:37:59,719] [INFO] [config.py:957:print]   load_universal_checkpoint .... False
+[2023-04-21 23:37:59,719] [INFO] [config.py:957:print]   loss_scale ................... 0
+[2023-04-21 23:37:59,719] [INFO] [config.py:957:print]   memory_breakdown ............. False
+[2023-04-21 23:37:59,719] [INFO] [config.py:957:print]   monitor_config ............... tensorboard=TensorBoardConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') wandb=WandbConfig(enabled=False, group=None, team=None, project='deepspeed') csv_monitor=CSVConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') enabled=False
+[2023-04-21 23:37:59,719] [INFO] [config.py:957:print]   nebula_config ................ {
+    "enabled": false, 
+    "persistent_storage_path": null, 
+    "persistent_time_interval": 100, 
+    "num_of_version_in_retention": 2, 
+    "enable_nebula_load": true, 
+    "load_path": null
+}
+[2023-04-21 23:37:59,719] [INFO] [config.py:957:print]   optimizer_legacy_fusion ...... False
+[2023-04-21 23:37:59,719] [INFO] [config.py:957:print]   optimizer_name ............... None
+[2023-04-21 23:37:59,719] [INFO] [config.py:957:print]   optimizer_params ............. None
+[2023-04-21 23:37:59,719] [INFO] [config.py:957:print]   pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0}
+[2023-04-21 23:37:59,719] [INFO] [config.py:957:print]   pld_enabled .................. False
+[2023-04-21 23:37:59,719] [INFO] [config.py:957:print]   pld_params ................... False
+[2023-04-21 23:37:59,719] [INFO] [config.py:957:print]   prescale_gradients ........... False
+[2023-04-21 23:37:59,719] [INFO] [config.py:957:print]   scheduler_name ............... None
+[2023-04-21 23:37:59,719] [INFO] [config.py:957:print]   scheduler_params ............. None
+[2023-04-21 23:37:59,719] [INFO] [config.py:957:print]   sparse_attention ............. None
+[2023-04-21 23:37:59,719] [INFO] [config.py:957:print]   sparse_gradients_enabled ..... False
+[2023-04-21 23:37:59,719] [INFO] [config.py:957:print]   steps_per_print .............. 10
+[2023-04-21 23:37:59,719] [INFO] [config.py:957:print]   train_batch_size ............. 32
+[2023-04-21 23:37:59,719] [INFO] [config.py:957:print]   train_micro_batch_size_per_gpu  4
+[2023-04-21 23:37:59,719] [INFO] [config.py:957:print]   use_node_local_storage ....... False
+[2023-04-21 23:37:59,719] [INFO] [config.py:957:print]   wall_clock_breakdown ......... False
+[2023-04-21 23:37:59,719] [INFO] [config.py:957:print]   world_size ................... 8
+[2023-04-21 23:37:59,719] [INFO] [config.py:957:print]   zero_allow_untested_optimizer  False
+[2023-04-21 23:37:59,719] [INFO] [config.py:957:print]   zero_config .................. stage=0 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=500,000,000 allgather_partitions=True allgather_bucket_size=500,000,000 overlap_comm=False load_from_fp32_weights=True elastic_checkpoint=False offload_param=DeepSpeedZeroOffloadParamConfig(device='none', nvme_path=None, buffer_count=5, buffer_size=100,000,000, max_in_cpu=1,000,000,000, pin_memory=False) offload_optimizer=None sub_group_size=1,000,000,000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=50,000,000 param_persistence_threshold=10000 model_persistence_threshold=sys.maxsize max_live_parameters=1,000,000,000 max_reuse_distance=1,000,000,000 gather_16bit_weights_on_model_save=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=False memory_efficient_linear=False
+[2023-04-21 23:37:59,719] [INFO] [config.py:957:print]   zero_enabled ................. False
+[2023-04-21 23:37:59,719] [INFO] [config.py:957:print]   zero_force_ds_cpu_optimizer .. True
+[2023-04-21 23:37:59,719] [INFO] [config.py:957:print]   zero_optimization_stage ...... 0
+[2023-04-21 23:37:59,720] [INFO] [config.py:943:print_user_config]   json = {
+    "train_batch_size": 32, 
+    "train_micro_batch_size_per_gpu": 4, 
+    "steps_per_print": 10, 
+    "zero_optimization": {
+        "stage": 0, 
+        "stage3_param_persistence_threshold": 1.000000e+04, 
+        "offload_param": {
+            "device": "none"
+        }, 
+        "memory_efficient_linear": false
+    }, 
+    "fp16": {
+        "enabled": true
+    }, 
+    "gradient_clipping": 1.0, 
+    "prescale_gradients": false, 
+    "wall_clock_breakdown": false
+}
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 0.0014257431030273438 seconds
+*******************[end] Initialized EMA Model [end] (duration: 23.82s)*******************
+************************[start] Initializing Critic Model [start] ************************
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 0.0015196800231933594 seconds
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 0.0009224414825439453 seconds
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
@@ -1443,7 +1217,7 @@ Installed CUDA version 11.4 does not match the version torch was compiled with 1
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module fused_adam, skipping build step...
 Loading extension module fused_adam...
-Time to load fused_adam op: 0.0025348663330078125 seconds
+Time to load fused_adam op: 0.006412982940673828 seconds
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
@@ -1452,20 +1226,16 @@ Installed CUDA version 11.4 does not match the version torch was compiled with 1
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module fused_adam, skipping build step...
 Loading extension module fused_adam...
-Time to load fused_adam op: 0.0019736289978027344 seconds
+Time to load fused_adam op: 0.0021407604217529297 seconds
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
-huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
-To disable this warning, you can either:
 To disable this warning, you can either:
-	- Avoid using `tokenizers` before the fork if possible
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
 Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module fused_adam, skipping build step...
 Loading extension module fused_adam...
-Time to load fused_adam op: 0.0029854774475097656 seconds
+Time to load fused_adam op: 0.0017254352569580078 seconds
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
@@ -1474,12 +1244,8 @@ Installed CUDA version 11.4 does not match the version torch was compiled with 1
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module fused_adam, skipping build step...
 Loading extension module fused_adam...
-Time to load fused_adam op: 0.0022940635681152344 seconds
-Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
-Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
-No modifications detected for re-loaded extension module fused_adam, skipping build step...
-Loading extension module fused_adam...
-Time to load fused_adam op: 0.0029518604278564453 seconds
+Time to load fused_adam op: 0.002038240432739258 seconds
+[2023-04-21 23:38:09,597] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed info: version=0.9.0, git-hash=unknown, git-branch=unknown
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
@@ -1488,15 +1254,11 @@ Installed CUDA version 11.4 does not match the version torch was compiled with 1
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module fused_adam, skipping build step...
 Loading extension module fused_adam...
-Time to load fused_adam op: 0.002866029739379883 seconds
-Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
-No modifications detected for re-loaded extension module utils, skipping build step...
-Loading extension module utils...
-Time to load utils op: 0.0012390613555908203 seconds
-Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
-No modifications detected for re-loaded extension module utils, skipping build step...
-Loading extension module utils...
-Time to load utils op: 0.003458738327026367 seconds
+Time to load fused_adam op: 0.002299070358276367 seconds
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
@@ -1505,94 +1267,102 @@ Installed CUDA version 11.4 does not match the version torch was compiled with 1
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module fused_adam, skipping build step...
 Loading extension module fused_adam...
-Time to load fused_adam op: 0.002884387969970703 seconds
-[2023-04-14 08:46:30,739] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed info: version=0.9.0, git-hash=unknown, git-branch=unknown
+Time to load fused_adam op: 0.0019500255584716797 seconds
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module fused_adam, skipping build step...
+Loading extension module fused_adam...
+Time to load fused_adam op: 0.0018029212951660156 seconds
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
-Time to load utils op: 0.5726070404052734 seconds
-[2023-04-14 08:46:34,252] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False
-Rank: 6 partition count [8, 8] and sizes[(41365824, False), (33792, False)] 
-Rank: 7 partition count [8, 8] and sizes[(41365824, False), (33792, False)] 
-Rank: 1 partition count [8, 8] and sizes[(41365824, False), (33792, False)] 
+Time to load utils op: 0.002105236053466797 seconds
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
-Time to load utils op: 0.0016791820526123047 seconds
-[2023-04-14 08:46:36,743] [INFO] [logging.py:96:log_dist] [Rank 0] Removing param_group that has no 'params' in the client Optimizer
-[2023-04-14 08:46:36,743] [INFO] [logging.py:96:log_dist] [Rank 0] Using client Optimizer as basic optimizer
+Time to load utils op: 0.001987457275390625 seconds
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
-Time to load utils op: 0.002676725387573242 seconds
-[2023-04-14 08:46:36,762] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Basic Optimizer = FusedAdam
-[2023-04-14 08:46:36,762] [INFO] [utils.py:51:is_zero_supported_optimizer] Checking ZeRO support for optimizer=FusedAdam type=<class 'deepspeed.ops.adam.fused_adam.FusedAdam'>
-[2023-04-14 08:46:36,763] [INFO] [logging.py:96:log_dist] [Rank 0] Creating torch.float16 ZeRO stage 2 optimizer
-[2023-04-14 08:46:36,763] [INFO] [stage_1_and_2.py:133:__init__] Reduce bucket size 500,000,000
-[2023-04-14 08:46:36,763] [INFO] [stage_1_and_2.py:134:__init__] Allgather bucket size 500,000,000
-[2023-04-14 08:46:36,763] [INFO] [stage_1_and_2.py:135:__init__] CPU Offload: False
-[2023-04-14 08:46:36,763] [INFO] [stage_1_and_2.py:136:__init__] Round robin gradient partitioning: False
+Time to load utils op: 0.0010986328125 seconds
+[2023-04-21 23:38:13,959] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False
+[2023-04-21 23:38:13,961] [INFO] [logging.py:96:log_dist] [Rank 0] Removing param_group that has no 'params' in the client Optimizer
+[2023-04-21 23:38:13,961] [INFO] [logging.py:96:log_dist] [Rank 0] Using client Optimizer as basic optimizer
+[2023-04-21 23:38:13,976] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Basic Optimizer = FusedAdam
+[2023-04-21 23:38:13,976] [INFO] [utils.py:51:is_zero_supported_optimizer] Checking ZeRO support for optimizer=FusedAdam type=<class 'deepspeed.ops.adam.fused_adam.FusedAdam'>
+[2023-04-21 23:38:13,976] [INFO] [logging.py:96:log_dist] [Rank 0] Creating torch.float16 ZeRO stage 2 optimizer
+[2023-04-21 23:38:13,977] [INFO] [stage_1_and_2.py:133:__init__] Reduce bucket size 500,000,000
+[2023-04-21 23:38:13,977] [INFO] [stage_1_and_2.py:134:__init__] Allgather bucket size 500,000,000
+[2023-04-21 23:38:13,977] [INFO] [stage_1_and_2.py:135:__init__] CPU Offload: False
+[2023-04-21 23:38:13,977] [INFO] [stage_1_and_2.py:136:__init__] Round robin gradient partitioning: False
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 0.0016143321990966797 seconds
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
-Time to load utils op: 0.0006117820739746094 seconds
+Time to load utils op: 0.004909992218017578 seconds
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
-Time to load utils op: 0.002028226852416992 seconds
+Time to load utils op: 0.001100301742553711 seconds
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
-Time to load utils op: 0.0013179779052734375 seconds
+Time to load utils op: 0.00106048583984375 seconds
+Rank: 4 partition count [8, 8] and sizes[(41365824, False), (33792, False)] 
+Rank: 1 partition count [8, 8] and sizes[(41365824, False), (33792, False)] 
+Rank: 7 partition count [8, 8] and sizes[(41365824, False), (33792, False)] 
+Rank: 2 partition count [8, 8] and sizes[(41365824, False), (33792, False)] 
+Rank: 6 partition count [8, 8] and sizes[(41365824, False), (33792, False)] 
 Rank: 0 partition count [8, 8] and sizes[(41365824, False), (33792, False)] 
 Rank: 3 partition count [8, 8] and sizes[(41365824, False), (33792, False)] 
-Rank: 4 partition count [8, 8] and sizes[(41365824, False), (33792, False)] 
 Rank: 5 partition count [8, 8] and sizes[(41365824, False), (33792, False)] 
-Rank: 2 partition count [8, 8] and sizes[(41365824, False), (33792, False)] 
-Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
-
-No modifications detected for re-loaded extension module utils, skipping build step...
-Loading extension module utils...
-Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...Time to load utils op: 0.0013089179992675781 seconds
-
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
-Time to load utils op: 0.0011861324310302734 seconds
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Time to load utils op: 0.0012159347534179688 seconds
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
-Time to load utils op: 0.0008509159088134766 secondsUsing /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...Time to load utils op: 0.001161813735961914 seconds
 
+Time to load utils op: 0.0007848739624023438 seconds
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
-Time to load utils op: 0.0007336139678955078 seconds
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Time to load utils op: 0.0008122920989990234 seconds
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
-Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...Time to load utils op: 0.0008528232574462891 seconds
-
+Time to load utils op: 0.0008475780487060547 seconds
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
-Time to load utils op: 0.0011703968048095703 seconds
+Time to load utils op: 0.0010933876037597656 seconds
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
-Time to load utils op: 0.0012271404266357422 seconds
-[2023-04-14 08:46:42,781] [INFO] [utils.py:785:see_memory_usage] Before initializing optimizer states
-[2023-04-14 08:46:42,782] [INFO] [utils.py:786:see_memory_usage] MA 8.1 GB         Max_MA 8.1 GB         CA 8.29 GB         Max_CA 8 GB 
-[2023-04-14 08:46:42,782] [INFO] [utils.py:793:see_memory_usage] CPU Virtual Memory:  used = 125.79 GB, percent = 12.5%
-[2023-04-14 08:46:43,491] [INFO] [utils.py:785:see_memory_usage] After initializing optimizer states
-[2023-04-14 08:46:43,491] [INFO] [utils.py:786:see_memory_usage] MA 8.41 GB         Max_MA 8.56 GB         CA 8.75 GB         Max_CA 9 GB 
-[2023-04-14 08:46:43,492] [INFO] [utils.py:793:see_memory_usage] CPU Virtual Memory:  used = 127.29 GB, percent = 12.6%
-[2023-04-14 08:46:43,492] [INFO] [stage_1_and_2.py:489:__init__] optimizer state initialized
-[2023-04-14 08:46:44,198] [INFO] [utils.py:785:see_memory_usage] After initializing ZeRO optimizer
-[2023-04-14 08:46:44,198] [INFO] [utils.py:786:see_memory_usage] MA 8.41 GB         Max_MA 8.41 GB         CA 8.75 GB         Max_CA 9 GB 
-[2023-04-14 08:46:44,199] [INFO] [utils.py:793:see_memory_usage] CPU Virtual Memory:  used = 129.3 GB, percent = 12.8%
-[2023-04-14 08:46:44,200] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Final Optimizer = FusedAdam
-[2023-04-14 08:46:44,200] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed using client LR scheduler
-[2023-04-14 08:46:44,200] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed LR Scheduler = <torch.optim.lr_scheduler.LambdaLR object at 0x7f05908fb7c0>
-[2023-04-14 08:46:44,200] [INFO] [logging.py:96:log_dist] [Rank 0] step=0, skipped=0, lr=[0.0, 0.0], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 08:46:44,201] [INFO] [config.py:953:print] DeepSpeedEngine configuration:
-[2023-04-14 08:46:44,201] [INFO] [config.py:957:print]   activation_checkpointing_config  {
+Time to load utils op: 0.0012607574462890625 seconds
+[2023-04-21 23:38:23,773] [INFO] [utils.py:785:see_memory_usage] Before initializing optimizer states
+[2023-04-21 23:38:23,774] [INFO] [utils.py:786:see_memory_usage] MA 10.55 GB         Max_MA 10.55 GB         CA 10.94 GB         Max_CA 11 GB 
+[2023-04-21 23:38:23,774] [INFO] [utils.py:793:see_memory_usage] CPU Virtual Memory:  used = 89.44 GB, percent = 8.9%
+[2023-04-21 23:38:23,968] [INFO] [utils.py:785:see_memory_usage] After initializing optimizer states
+[2023-04-21 23:38:23,968] [INFO] [utils.py:786:see_memory_usage] MA 10.86 GB         Max_MA 11.01 GB         CA 11.41 GB         Max_CA 11 GB 
+[2023-04-21 23:38:23,969] [INFO] [utils.py:793:see_memory_usage] CPU Virtual Memory:  used = 89.93 GB, percent = 8.9%
+[2023-04-21 23:38:23,969] [INFO] [stage_1_and_2.py:489:__init__] optimizer state initialized
+[2023-04-21 23:38:24,165] [INFO] [utils.py:785:see_memory_usage] After initializing ZeRO optimizer
+[2023-04-21 23:38:24,166] [INFO] [utils.py:786:see_memory_usage] MA 10.86 GB         Max_MA 10.86 GB         CA 11.41 GB         Max_CA 11 GB 
+[2023-04-21 23:38:24,166] [INFO] [utils.py:793:see_memory_usage] CPU Virtual Memory:  used = 90.43 GB, percent = 9.0%
+[2023-04-21 23:38:24,167] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Final Optimizer = FusedAdam
+[2023-04-21 23:38:24,168] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed using client LR scheduler
+[2023-04-21 23:38:24,168] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed LR Scheduler = <torch.optim.lr_scheduler.LambdaLR object at 0x7fb1f87ce670>
+[2023-04-21 23:38:24,168] [INFO] [logging.py:96:log_dist] [Rank 0] step=0, skipped=0, lr=[0.0, 0.0], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:38:24,168] [INFO] [config.py:953:print] DeepSpeedEngine configuration:
+[2023-04-21 23:38:24,168] [INFO] [config.py:957:print]   activation_checkpointing_config  {
     "partition_activations": false, 
     "contiguous_memory_optimization": false, 
     "cpu_checkpointing": false, 
@@ -1600,10 +1370,10 @@ Time to load utils op: 0.0012271404266357422 seconds
     "synchronize_checkpoint_boundary": false, 
     "profile": false
 }
-[2023-04-14 08:46:44,201] [INFO] [config.py:957:print]   aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True}
-[2023-04-14 08:46:44,201] [INFO] [config.py:957:print]   amp_enabled .................. False
-[2023-04-14 08:46:44,201] [INFO] [config.py:957:print]   amp_params ................... False
-[2023-04-14 08:46:44,201] [INFO] [config.py:957:print]   autotuning_config ............ {
+[2023-04-21 23:38:24,168] [INFO] [config.py:957:print]   aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True}
+[2023-04-21 23:38:24,168] [INFO] [config.py:957:print]   amp_enabled .................. False
+[2023-04-21 23:38:24,169] [INFO] [config.py:957:print]   amp_params ................... False
+[2023-04-21 23:38:24,169] [INFO] [config.py:957:print]   autotuning_config ............ {
     "enabled": false, 
     "start_step": null, 
     "end_step": null, 
@@ -1628,31 +1398,31 @@ Time to load utils op: 0.0012271404266357422 seconds
     "min_train_micro_batch_size_per_gpu": 1, 
     "num_tuning_micro_batch_sizes": 3
 }
-[2023-04-14 08:46:44,208] [INFO] [config.py:957:print]   bfloat16_enabled ............. False
-[2023-04-14 08:46:44,208] [INFO] [config.py:957:print]   checkpoint_parallel_write_pipeline  False
-[2023-04-14 08:46:44,208] [INFO] [config.py:957:print]   checkpoint_tag_validation_enabled  True
-[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   checkpoint_tag_validation_fail  False
-[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   comms_config ................. <deepspeed.comm.config.DeepSpeedCommsConfig object at 0x7f04d00c9ee0>
-[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   communication_data_type ...... None
-[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}}
-[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   curriculum_enabled_legacy .... False
-[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   curriculum_params_legacy ..... False
-[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   data_efficiency_config ....... {'enabled': False, 'seed': 1234, 'data_sampling': {'enabled': False, 'num_epochs': 1000, 'num_workers': 0, 'curriculum_learning': {'enabled': False}}, 'data_routing': {'enabled': False, 'random_ltd': {'enabled': False, 'layer_token_lr_schedule': {'enabled': False}}}}
-[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   data_efficiency_enabled ...... False
-[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   dataloader_drop_last ......... False
-[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   disable_allgather ............ False
-[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   dump_state ................... False
-[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   dynamic_loss_scale_args ...... {'init_scale': 65536, 'scale_window': 100, 'delayed_shift': 2, 'min_scale': 1}
-[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   eigenvalue_enabled ........... False
-[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   eigenvalue_gas_boundary_resolution  1
-[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   eigenvalue_layer_name ........ bert.encoder.layer
-[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   eigenvalue_layer_num ......... 0
-[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   eigenvalue_max_iter .......... 100
-[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   eigenvalue_stability ......... 1e-06
-[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   eigenvalue_tol ............... 0.01
-[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   eigenvalue_verbose ........... False
-[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   elasticity_enabled ........... False
-[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   flops_profiler_config ........ {
+[2023-04-21 23:38:24,169] [INFO] [config.py:957:print]   bfloat16_enabled ............. False
+[2023-04-21 23:38:24,169] [INFO] [config.py:957:print]   checkpoint_parallel_write_pipeline  False
+[2023-04-21 23:38:24,169] [INFO] [config.py:957:print]   checkpoint_tag_validation_enabled  True
+[2023-04-21 23:38:24,169] [INFO] [config.py:957:print]   checkpoint_tag_validation_fail  False
+[2023-04-21 23:38:24,169] [INFO] [config.py:957:print]   comms_config ................. <deepspeed.comm.config.DeepSpeedCommsConfig object at 0x7fb1f7ff0670>
+[2023-04-21 23:38:24,169] [INFO] [config.py:957:print]   communication_data_type ...... None
+[2023-04-21 23:38:24,169] [INFO] [config.py:957:print]   compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}}
+[2023-04-21 23:38:24,169] [INFO] [config.py:957:print]   curriculum_enabled_legacy .... False
+[2023-04-21 23:38:24,169] [INFO] [config.py:957:print]   curriculum_params_legacy ..... False
+[2023-04-21 23:38:24,169] [INFO] [config.py:957:print]   data_efficiency_config ....... {'enabled': False, 'seed': 1234, 'data_sampling': {'enabled': False, 'num_epochs': 1000, 'num_workers': 0, 'curriculum_learning': {'enabled': False}}, 'data_routing': {'enabled': False, 'random_ltd': {'enabled': False, 'layer_token_lr_schedule': {'enabled': False}}}}
+[2023-04-21 23:38:24,169] [INFO] [config.py:957:print]   data_efficiency_enabled ...... False
+[2023-04-21 23:38:24,169] [INFO] [config.py:957:print]   dataloader_drop_last ......... False
+[2023-04-21 23:38:24,169] [INFO] [config.py:957:print]   disable_allgather ............ False
+[2023-04-21 23:38:24,169] [INFO] [config.py:957:print]   dump_state ................... False
+[2023-04-21 23:38:24,169] [INFO] [config.py:957:print]   dynamic_loss_scale_args ...... {'init_scale': 65536, 'scale_window': 100, 'delayed_shift': 2, 'min_scale': 1}
+[2023-04-21 23:38:24,169] [INFO] [config.py:957:print]   eigenvalue_enabled ........... False
+[2023-04-21 23:38:24,169] [INFO] [config.py:957:print]   eigenvalue_gas_boundary_resolution  1
+[2023-04-21 23:38:24,169] [INFO] [config.py:957:print]   eigenvalue_layer_name ........ bert.encoder.layer
+[2023-04-21 23:38:24,169] [INFO] [config.py:957:print]   eigenvalue_layer_num ......... 0
+[2023-04-21 23:38:24,169] [INFO] [config.py:957:print]   eigenvalue_max_iter .......... 100
+[2023-04-21 23:38:24,169] [INFO] [config.py:957:print]   eigenvalue_stability ......... 1e-06
+[2023-04-21 23:38:24,169] [INFO] [config.py:957:print]   eigenvalue_tol ............... 0.01
+[2023-04-21 23:38:24,169] [INFO] [config.py:957:print]   eigenvalue_verbose ........... False
+[2023-04-21 23:38:24,169] [INFO] [config.py:957:print]   elasticity_enabled ........... False
+[2023-04-21 23:38:24,169] [INFO] [config.py:957:print]   flops_profiler_config ........ {
     "enabled": false, 
     "profile_step": 1, 
     "module_depth": -1, 
@@ -1660,21 +1430,21 @@ Time to load utils op: 0.0012271404266357422 seconds
     "detailed": true, 
     "output_file": null
 }
-[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   fp16_auto_cast ............... False
-[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   fp16_enabled ................. True
-[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   fp16_master_weights_and_gradients  False
-[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   global_rank .................. 0
-[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   grad_accum_dtype ............. None
-[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   gradient_accumulation_steps .. 1
-[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   gradient_clipping ............ 1.0
-[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   gradient_predivide_factor .... 1.0
-[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   hybrid_engine ................ enabled=False max_out_tokens=512 inference_tp_size=1 release_inference_cache=False pin_parameters=True tp_gather_partition_size=8
-[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   initial_dynamic_scale ........ 65536
-[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   load_universal_checkpoint .... False
-[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   loss_scale ................... 0
-[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   memory_breakdown ............. False
-[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   monitor_config ............... tensorboard=TensorBoardConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') wandb=WandbConfig(enabled=False, group=None, team=None, project='deepspeed') csv_monitor=CSVConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') enabled=False
-[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   nebula_config ................ {
+[2023-04-21 23:38:24,169] [INFO] [config.py:957:print]   fp16_auto_cast ............... False
+[2023-04-21 23:38:24,169] [INFO] [config.py:957:print]   fp16_enabled ................. True
+[2023-04-21 23:38:24,169] [INFO] [config.py:957:print]   fp16_master_weights_and_gradients  False
+[2023-04-21 23:38:24,169] [INFO] [config.py:957:print]   global_rank .................. 0
+[2023-04-21 23:38:24,169] [INFO] [config.py:957:print]   grad_accum_dtype ............. None
+[2023-04-21 23:38:24,169] [INFO] [config.py:957:print]   gradient_accumulation_steps .. 1
+[2023-04-21 23:38:24,169] [INFO] [config.py:957:print]   gradient_clipping ............ 1.0
+[2023-04-21 23:38:24,169] [INFO] [config.py:957:print]   gradient_predivide_factor .... 1.0
+[2023-04-21 23:38:24,170] [INFO] [config.py:957:print]   hybrid_engine ................ enabled=False max_out_tokens=512 inference_tp_size=1 release_inference_cache=False pin_parameters=True tp_gather_partition_size=8
+[2023-04-21 23:38:24,170] [INFO] [config.py:957:print]   initial_dynamic_scale ........ 65536
+[2023-04-21 23:38:24,170] [INFO] [config.py:957:print]   load_universal_checkpoint .... False
+[2023-04-21 23:38:24,170] [INFO] [config.py:957:print]   loss_scale ................... 0
+[2023-04-21 23:38:24,170] [INFO] [config.py:957:print]   memory_breakdown ............. False
+[2023-04-21 23:38:24,170] [INFO] [config.py:957:print]   monitor_config ............... tensorboard=TensorBoardConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') wandb=WandbConfig(enabled=False, group=None, team=None, project='deepspeed') csv_monitor=CSVConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') enabled=False
+[2023-04-21 23:38:24,170] [INFO] [config.py:957:print]   nebula_config ................ {
     "enabled": false, 
     "persistent_storage_path": null, 
     "persistent_time_interval": 100, 
@@ -1682,29 +1452,29 @@ Time to load utils op: 0.0012271404266357422 seconds
     "enable_nebula_load": true, 
     "load_path": null
 }
-[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   optimizer_legacy_fusion ...... False
-[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   optimizer_name ............... None
-[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   optimizer_params ............. None
-[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0}
-[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   pld_enabled .................. False
-[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   pld_params ................... False
-[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   prescale_gradients ........... False
-[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   scheduler_name ............... None
-[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   scheduler_params ............. None
-[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   sparse_attention ............. None
-[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   sparse_gradients_enabled ..... False
-[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   steps_per_print .............. 10
-[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   train_batch_size ............. 32
-[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   train_micro_batch_size_per_gpu  4
-[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   use_node_local_storage ....... False
-[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   wall_clock_breakdown ......... False
-[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   world_size ................... 8
-[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   zero_allow_untested_optimizer  False
-[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   zero_config .................. stage=2 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=500,000,000 allgather_partitions=True allgather_bucket_size=500,000,000 overlap_comm=False load_from_fp32_weights=True elastic_checkpoint=False offload_param=DeepSpeedZeroOffloadParamConfig(device='none', nvme_path=None, buffer_count=5, buffer_size=100,000,000, max_in_cpu=1,000,000,000, pin_memory=False) offload_optimizer=DeepSpeedZeroOffloadOptimizerConfig(device='none', nvme_path=None, buffer_count=4, pin_memory=False, pipeline=False, pipeline_read=False, pipeline_write=False, fast_init=False) sub_group_size=1,000,000,000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=30000000 param_persistence_threshold=10000 model_persistence_threshold=sys.maxsize max_live_parameters=30000000 max_reuse_distance=1,000,000,000 gather_16bit_weights_on_model_save=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=False memory_efficient_linear=False
-[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   zero_enabled ................. True
-[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   zero_force_ds_cpu_optimizer .. True
-[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   zero_optimization_stage ...... 2
-[2023-04-14 08:46:44,210] [INFO] [config.py:943:print_user_config]   json = {
+[2023-04-21 23:38:24,170] [INFO] [config.py:957:print]   optimizer_legacy_fusion ...... False
+[2023-04-21 23:38:24,170] [INFO] [config.py:957:print]   optimizer_name ............... None
+[2023-04-21 23:38:24,170] [INFO] [config.py:957:print]   optimizer_params ............. None
+[2023-04-21 23:38:24,170] [INFO] [config.py:957:print]   pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0}
+[2023-04-21 23:38:24,170] [INFO] [config.py:957:print]   pld_enabled .................. False
+[2023-04-21 23:38:24,170] [INFO] [config.py:957:print]   pld_params ................... False
+[2023-04-21 23:38:24,170] [INFO] [config.py:957:print]   prescale_gradients ........... False
+[2023-04-21 23:38:24,170] [INFO] [config.py:957:print]   scheduler_name ............... None
+[2023-04-21 23:38:24,170] [INFO] [config.py:957:print]   scheduler_params ............. None
+[2023-04-21 23:38:24,170] [INFO] [config.py:957:print]   sparse_attention ............. None
+[2023-04-21 23:38:24,170] [INFO] [config.py:957:print]   sparse_gradients_enabled ..... False
+[2023-04-21 23:38:24,170] [INFO] [config.py:957:print]   steps_per_print .............. 10
+[2023-04-21 23:38:24,170] [INFO] [config.py:957:print]   train_batch_size ............. 32
+[2023-04-21 23:38:24,170] [INFO] [config.py:957:print]   train_micro_batch_size_per_gpu  4
+[2023-04-21 23:38:24,170] [INFO] [config.py:957:print]   use_node_local_storage ....... False
+[2023-04-21 23:38:24,170] [INFO] [config.py:957:print]   wall_clock_breakdown ......... False
+[2023-04-21 23:38:24,170] [INFO] [config.py:957:print]   world_size ................... 8
+[2023-04-21 23:38:24,170] [INFO] [config.py:957:print]   zero_allow_untested_optimizer  False
+[2023-04-21 23:38:24,170] [INFO] [config.py:957:print]   zero_config .................. stage=2 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=500,000,000 allgather_partitions=True allgather_bucket_size=500,000,000 overlap_comm=False load_from_fp32_weights=True elastic_checkpoint=False offload_param=DeepSpeedZeroOffloadParamConfig(device='none', nvme_path=None, buffer_count=5, buffer_size=100,000,000, max_in_cpu=1,000,000,000, pin_memory=False) offload_optimizer=DeepSpeedZeroOffloadOptimizerConfig(device='none', nvme_path=None, buffer_count=4, pin_memory=False, pipeline=False, pipeline_read=False, pipeline_write=False, fast_init=False) sub_group_size=1,000,000,000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=30000000 param_persistence_threshold=10000 model_persistence_threshold=sys.maxsize max_live_parameters=30000000 max_reuse_distance=1,000,000,000 gather_16bit_weights_on_model_save=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=False memory_efficient_linear=False
+[2023-04-21 23:38:24,170] [INFO] [config.py:957:print]   zero_enabled ................. True
+[2023-04-21 23:38:24,170] [INFO] [config.py:957:print]   zero_force_ds_cpu_optimizer .. True
+[2023-04-21 23:38:24,170] [INFO] [config.py:957:print]   zero_optimization_stage ...... 2
+[2023-04-21 23:38:24,170] [INFO] [config.py:943:print_user_config]   json = {
     "train_batch_size": 32, 
     "train_micro_batch_size_per_gpu": 4, 
     "steps_per_print": 10, 
@@ -1730,6 +1500,7 @@ Time to load utils op: 0.0012271404266357422 seconds
     "wall_clock_breakdown": false, 
     "hybrid_engine": {
         "enabled": false, 
+        "max_out_tokens": 512, 
         "inference_tp_size": 1, 
         "release_inference_cache": false, 
         "pin_parameters": true, 
@@ -1739,29 +1510,25 @@ Time to load utils op: 0.0012271404266357422 seconds
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
-Time to load utils op: 0.0017056465148925781 seconds
-*****************[end] Initialized Critic Model [end] (duration: 31.02s)******************
+Time to load utils op: 0.0009183883666992188 seconds
+*****************[end] Initialized Critic Model [end] (duration: 24.45s)******************
 ************************[start] Initializing Reward Model [start] ************************
-[2023-04-14 08:47:02,078] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed info: version=0.9.0, git-hash=unknown, git-branch=unknown
-Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+[2023-04-21 23:38:34,778] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed info: version=0.9.0, git-hash=unknown, git-branch=unknown
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
-Time to load utils op: 1.1240429878234863 seconds
-No modifications detected for re-loaded extension module utils, skipping build step...
-Loading extension module utils...
-Time to load utils op: 0.42214012145996094 seconds
+Time to load utils op: 0.0014276504516601562 seconds
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
-Time to load utils op: 0.002139568328857422 seconds
+Time to load utils op: 0.0013539791107177734 seconds
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
-Time to load utils op: 0.0012676715850830078 seconds
-[2023-04-14 08:47:21,293] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False
-[2023-04-14 08:47:21,295] [INFO] [config.py:953:print] DeepSpeedEngine configuration:
-[2023-04-14 08:47:21,295] [INFO] [config.py:957:print]   activation_checkpointing_config  {
+Time to load utils op: 0.0012841224670410156 seconds
+[2023-04-21 23:38:41,712] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False
+[2023-04-21 23:38:41,713] [INFO] [config.py:953:print] DeepSpeedEngine configuration:
+[2023-04-21 23:38:41,713] [INFO] [config.py:957:print]   activation_checkpointing_config  {
     "partition_activations": false, 
     "contiguous_memory_optimization": false, 
     "cpu_checkpointing": false, 
@@ -1769,10 +1536,10 @@ Time to load utils op: 0.0012676715850830078 seconds
     "synchronize_checkpoint_boundary": false, 
     "profile": false
 }
-[2023-04-14 08:47:21,295] [INFO] [config.py:957:print]   aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True}
-[2023-04-14 08:47:21,295] [INFO] [config.py:957:print]   amp_enabled .................. False
-[2023-04-14 08:47:21,295] [INFO] [config.py:957:print]   amp_params ................... False
-[2023-04-14 08:47:21,295] [INFO] [config.py:957:print]   autotuning_config ............ {
+[2023-04-21 23:38:41,713] [INFO] [config.py:957:print]   aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True}
+[2023-04-21 23:38:41,713] [INFO] [config.py:957:print]   amp_enabled .................. False
+[2023-04-21 23:38:41,713] [INFO] [config.py:957:print]   amp_params ................... False
+[2023-04-21 23:38:41,714] [INFO] [config.py:957:print]   autotuning_config ............ {
     "enabled": false, 
     "start_step": null, 
     "end_step": null, 
@@ -1797,31 +1564,31 @@ Time to load utils op: 0.0012676715850830078 seconds
     "min_train_micro_batch_size_per_gpu": 1, 
     "num_tuning_micro_batch_sizes": 3
 }
-[2023-04-14 08:47:21,295] [INFO] [config.py:957:print]   bfloat16_enabled ............. False
-[2023-04-14 08:47:21,295] [INFO] [config.py:957:print]   checkpoint_parallel_write_pipeline  False
-[2023-04-14 08:47:21,295] [INFO] [config.py:957:print]   checkpoint_tag_validation_enabled  True
-[2023-04-14 08:47:21,295] [INFO] [config.py:957:print]   checkpoint_tag_validation_fail  False
-[2023-04-14 08:47:21,295] [INFO] [config.py:957:print]   comms_config ................. <deepspeed.comm.config.DeepSpeedCommsConfig object at 0x7f0501546370>
-[2023-04-14 08:47:21,295] [INFO] [config.py:957:print]   communication_data_type ...... None
-[2023-04-14 08:47:21,295] [INFO] [config.py:957:print]   compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}}
-[2023-04-14 08:47:21,295] [INFO] [config.py:957:print]   curriculum_enabled_legacy .... False
-[2023-04-14 08:47:21,295] [INFO] [config.py:957:print]   curriculum_params_legacy ..... False
-[2023-04-14 08:47:21,295] [INFO] [config.py:957:print]   data_efficiency_config ....... {'enabled': False, 'seed': 1234, 'data_sampling': {'enabled': False, 'num_epochs': 1000, 'num_workers': 0, 'curriculum_learning': {'enabled': False}}, 'data_routing': {'enabled': False, 'random_ltd': {'enabled': False, 'layer_token_lr_schedule': {'enabled': False}}}}
-[2023-04-14 08:47:21,295] [INFO] [config.py:957:print]   data_efficiency_enabled ...... False
-[2023-04-14 08:47:21,295] [INFO] [config.py:957:print]   dataloader_drop_last ......... False
-[2023-04-14 08:47:21,295] [INFO] [config.py:957:print]   disable_allgather ............ False
-[2023-04-14 08:47:21,295] [INFO] [config.py:957:print]   dump_state ................... False
-[2023-04-14 08:47:21,295] [INFO] [config.py:957:print]   dynamic_loss_scale_args ...... None
-[2023-04-14 08:47:21,295] [INFO] [config.py:957:print]   eigenvalue_enabled ........... False
-[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   eigenvalue_gas_boundary_resolution  1
-[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   eigenvalue_layer_name ........ bert.encoder.layer
-[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   eigenvalue_layer_num ......... 0
-[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   eigenvalue_max_iter .......... 100
-[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   eigenvalue_stability ......... 1e-06
-[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   eigenvalue_tol ............... 0.01
-[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   eigenvalue_verbose ........... False
-[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   elasticity_enabled ........... False
-[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   flops_profiler_config ........ {
+[2023-04-21 23:38:41,714] [INFO] [config.py:957:print]   bfloat16_enabled ............. False
+[2023-04-21 23:38:41,714] [INFO] [config.py:957:print]   checkpoint_parallel_write_pipeline  False
+[2023-04-21 23:38:41,714] [INFO] [config.py:957:print]   checkpoint_tag_validation_enabled  True
+[2023-04-21 23:38:41,714] [INFO] [config.py:957:print]   checkpoint_tag_validation_fail  False
+[2023-04-21 23:38:41,714] [INFO] [config.py:957:print]   comms_config ................. <deepspeed.comm.config.DeepSpeedCommsConfig object at 0x7fb1f64d1af0>
+[2023-04-21 23:38:41,714] [INFO] [config.py:957:print]   communication_data_type ...... None
+[2023-04-21 23:38:41,714] [INFO] [config.py:957:print]   compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}}
+[2023-04-21 23:38:41,714] [INFO] [config.py:957:print]   curriculum_enabled_legacy .... False
+[2023-04-21 23:38:41,714] [INFO] [config.py:957:print]   curriculum_params_legacy ..... False
+[2023-04-21 23:38:41,714] [INFO] [config.py:957:print]   data_efficiency_config ....... {'enabled': False, 'seed': 1234, 'data_sampling': {'enabled': False, 'num_epochs': 1000, 'num_workers': 0, 'curriculum_learning': {'enabled': False}}, 'data_routing': {'enabled': False, 'random_ltd': {'enabled': False, 'layer_token_lr_schedule': {'enabled': False}}}}
+[2023-04-21 23:38:41,714] [INFO] [config.py:957:print]   data_efficiency_enabled ...... False
+[2023-04-21 23:38:41,714] [INFO] [config.py:957:print]   dataloader_drop_last ......... False
+[2023-04-21 23:38:41,714] [INFO] [config.py:957:print]   disable_allgather ............ False
+[2023-04-21 23:38:41,714] [INFO] [config.py:957:print]   dump_state ................... False
+[2023-04-21 23:38:41,714] [INFO] [config.py:957:print]   dynamic_loss_scale_args ...... None
+[2023-04-21 23:38:41,714] [INFO] [config.py:957:print]   eigenvalue_enabled ........... False
+[2023-04-21 23:38:41,714] [INFO] [config.py:957:print]   eigenvalue_gas_boundary_resolution  1
+[2023-04-21 23:38:41,714] [INFO] [config.py:957:print]   eigenvalue_layer_name ........ bert.encoder.layer
+[2023-04-21 23:38:41,714] [INFO] [config.py:957:print]   eigenvalue_layer_num ......... 0
+[2023-04-21 23:38:41,714] [INFO] [config.py:957:print]   eigenvalue_max_iter .......... 100
+[2023-04-21 23:38:41,714] [INFO] [config.py:957:print]   eigenvalue_stability ......... 1e-06
+[2023-04-21 23:38:41,714] [INFO] [config.py:957:print]   eigenvalue_tol ............... 0.01
+[2023-04-21 23:38:41,714] [INFO] [config.py:957:print]   eigenvalue_verbose ........... False
+[2023-04-21 23:38:41,714] [INFO] [config.py:957:print]   elasticity_enabled ........... False
+[2023-04-21 23:38:41,714] [INFO] [config.py:957:print]   flops_profiler_config ........ {
     "enabled": false, 
     "profile_step": 1, 
     "module_depth": -1, 
@@ -1829,21 +1596,21 @@ Time to load utils op: 0.0012676715850830078 seconds
     "detailed": true, 
     "output_file": null
 }
-[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   fp16_auto_cast ............... False
-[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   fp16_enabled ................. True
-[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   fp16_master_weights_and_gradients  False
-[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   global_rank .................. 0
-[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   grad_accum_dtype ............. None
-[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   gradient_accumulation_steps .. 1
-[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   gradient_clipping ............ 1.0
-[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   gradient_predivide_factor .... 1.0
-[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   hybrid_engine ................ enabled=False max_out_tokens=512 inference_tp_size=1 release_inference_cache=False pin_parameters=True tp_gather_partition_size=8
-[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   initial_dynamic_scale ........ 65536
-[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   load_universal_checkpoint .... False
-[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   loss_scale ................... 0
-[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   memory_breakdown ............. False
-[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   monitor_config ............... tensorboard=TensorBoardConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') wandb=WandbConfig(enabled=False, group=None, team=None, project='deepspeed') csv_monitor=CSVConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') enabled=False
-[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   nebula_config ................ {
+[2023-04-21 23:38:41,714] [INFO] [config.py:957:print]   fp16_auto_cast ............... False
+[2023-04-21 23:38:41,714] [INFO] [config.py:957:print]   fp16_enabled ................. True
+[2023-04-21 23:38:41,714] [INFO] [config.py:957:print]   fp16_master_weights_and_gradients  False
+[2023-04-21 23:38:41,714] [INFO] [config.py:957:print]   global_rank .................. 0
+[2023-04-21 23:38:41,714] [INFO] [config.py:957:print]   grad_accum_dtype ............. None
+[2023-04-21 23:38:41,714] [INFO] [config.py:957:print]   gradient_accumulation_steps .. 1
+[2023-04-21 23:38:41,715] [INFO] [config.py:957:print]   gradient_clipping ............ 1.0
+[2023-04-21 23:38:41,715] [INFO] [config.py:957:print]   gradient_predivide_factor .... 1.0
+[2023-04-21 23:38:41,715] [INFO] [config.py:957:print]   hybrid_engine ................ enabled=False max_out_tokens=512 inference_tp_size=1 release_inference_cache=False pin_parameters=True tp_gather_partition_size=8
+[2023-04-21 23:38:41,715] [INFO] [config.py:957:print]   initial_dynamic_scale ........ 65536
+[2023-04-21 23:38:41,715] [INFO] [config.py:957:print]   load_universal_checkpoint .... False
+[2023-04-21 23:38:41,715] [INFO] [config.py:957:print]   loss_scale ................... 0
+[2023-04-21 23:38:41,715] [INFO] [config.py:957:print]   memory_breakdown ............. False
+[2023-04-21 23:38:41,715] [INFO] [config.py:957:print]   monitor_config ............... tensorboard=TensorBoardConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') wandb=WandbConfig(enabled=False, group=None, team=None, project='deepspeed') csv_monitor=CSVConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') enabled=False
+[2023-04-21 23:38:41,715] [INFO] [config.py:957:print]   nebula_config ................ {
     "enabled": false, 
     "persistent_storage_path": null, 
     "persistent_time_interval": 100, 
@@ -1851,29 +1618,29 @@ Time to load utils op: 0.0012676715850830078 seconds
     "enable_nebula_load": true, 
     "load_path": null
 }
-[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   optimizer_legacy_fusion ...... False
-[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   optimizer_name ............... None
-[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   optimizer_params ............. None
-[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0}
-[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   pld_enabled .................. False
-[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   pld_params ................... False
-[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   prescale_gradients ........... False
-[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   scheduler_name ............... None
-[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   scheduler_params ............. None
-[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   sparse_attention ............. None
-[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   sparse_gradients_enabled ..... False
-[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   steps_per_print .............. 10
-[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   train_batch_size ............. 32
-[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   train_micro_batch_size_per_gpu  4
-[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   use_node_local_storage ....... False
-[2023-04-14 08:47:21,297] [INFO] [config.py:957:print]   wall_clock_breakdown ......... False
-[2023-04-14 08:47:21,297] [INFO] [config.py:957:print]   world_size ................... 8
-[2023-04-14 08:47:21,297] [INFO] [config.py:957:print]   zero_allow_untested_optimizer  False
-[2023-04-14 08:47:21,297] [INFO] [config.py:957:print]   zero_config .................. stage=0 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=500,000,000 allgather_partitions=True allgather_bucket_size=500,000,000 overlap_comm=False load_from_fp32_weights=True elastic_checkpoint=False offload_param=DeepSpeedZeroOffloadParamConfig(device='none', nvme_path=None, buffer_count=5, buffer_size=100,000,000, max_in_cpu=1,000,000,000, pin_memory=False) offload_optimizer=None sub_group_size=1,000,000,000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=50,000,000 param_persistence_threshold=10000 model_persistence_threshold=sys.maxsize max_live_parameters=1,000,000,000 max_reuse_distance=1,000,000,000 gather_16bit_weights_on_model_save=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=False memory_efficient_linear=False
-[2023-04-14 08:47:21,297] [INFO] [config.py:957:print]   zero_enabled ................. False
-[2023-04-14 08:47:21,297] [INFO] [config.py:957:print]   zero_force_ds_cpu_optimizer .. True
-[2023-04-14 08:47:21,297] [INFO] [config.py:957:print]   zero_optimization_stage ...... 0
-[2023-04-14 08:47:21,297] [INFO] [config.py:943:print_user_config]   json = {
+[2023-04-21 23:38:41,715] [INFO] [config.py:957:print]   optimizer_legacy_fusion ...... False
+[2023-04-21 23:38:41,715] [INFO] [config.py:957:print]   optimizer_name ............... None
+[2023-04-21 23:38:41,715] [INFO] [config.py:957:print]   optimizer_params ............. None
+[2023-04-21 23:38:41,715] [INFO] [config.py:957:print]   pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0}
+[2023-04-21 23:38:41,715] [INFO] [config.py:957:print]   pld_enabled .................. False
+[2023-04-21 23:38:41,715] [INFO] [config.py:957:print]   pld_params ................... False
+[2023-04-21 23:38:41,715] [INFO] [config.py:957:print]   prescale_gradients ........... False
+[2023-04-21 23:38:41,715] [INFO] [config.py:957:print]   scheduler_name ............... None
+[2023-04-21 23:38:41,715] [INFO] [config.py:957:print]   scheduler_params ............. None
+[2023-04-21 23:38:41,715] [INFO] [config.py:957:print]   sparse_attention ............. None
+[2023-04-21 23:38:41,715] [INFO] [config.py:957:print]   sparse_gradients_enabled ..... False
+[2023-04-21 23:38:41,715] [INFO] [config.py:957:print]   steps_per_print .............. 10
+[2023-04-21 23:38:41,715] [INFO] [config.py:957:print]   train_batch_size ............. 32
+[2023-04-21 23:38:41,715] [INFO] [config.py:957:print]   train_micro_batch_size_per_gpu  4
+[2023-04-21 23:38:41,715] [INFO] [config.py:957:print]   use_node_local_storage ....... False
+[2023-04-21 23:38:41,715] [INFO] [config.py:957:print]   wall_clock_breakdown ......... False
+[2023-04-21 23:38:41,715] [INFO] [config.py:957:print]   world_size ................... 8
+[2023-04-21 23:38:41,715] [INFO] [config.py:957:print]   zero_allow_untested_optimizer  False
+[2023-04-21 23:38:41,715] [INFO] [config.py:957:print]   zero_config .................. stage=0 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=500,000,000 allgather_partitions=True allgather_bucket_size=500,000,000 overlap_comm=False load_from_fp32_weights=True elastic_checkpoint=False offload_param=DeepSpeedZeroOffloadParamConfig(device='none', nvme_path=None, buffer_count=5, buffer_size=100,000,000, max_in_cpu=1,000,000,000, pin_memory=False) offload_optimizer=None sub_group_size=1,000,000,000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=50,000,000 param_persistence_threshold=10000 model_persistence_threshold=sys.maxsize max_live_parameters=1,000,000,000 max_reuse_distance=1,000,000,000 gather_16bit_weights_on_model_save=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=False memory_efficient_linear=False
+[2023-04-21 23:38:41,715] [INFO] [config.py:957:print]   zero_enabled ................. False
+[2023-04-21 23:38:41,715] [INFO] [config.py:957:print]   zero_force_ds_cpu_optimizer .. True
+[2023-04-21 23:38:41,715] [INFO] [config.py:957:print]   zero_optimization_stage ...... 0
+[2023-04-21 23:38:41,715] [INFO] [config.py:943:print_user_config]   json = {
     "train_batch_size": 32, 
     "train_micro_batch_size_per_gpu": 4, 
     "steps_per_print": 10, 
@@ -1895,35806 +1662,4175 @@ Time to load utils op: 0.0012676715850830078 seconds
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
-Time to load utils op: 0.0011258125305175781 seconds
-*****************[end] Initialized Reward Model [end] (duration: 37.09s)******************
+Time to load utils op: 0.0010828971862792969 seconds
+*****************[end] Initialized Reward Model [end] (duration: 17.55s)******************
 ***** Running training *****
-Beginning of Epoch 1/1, Total Generation Batches 8260
+Beginning of Epoch 1/1, Total Generation Batches 954
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
-Time to load utils op: 0.009740114212036133 seconds
+Time to load utils op: 0.0012369155883789062 seconds
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
-Time to load utils op: 0.001196146011352539 seconds
+Time to load utils op: 0.0009238719940185547 seconds
 Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
-Time to load utils op: 0.001905679702758789 seconds
+Time to load utils op: 0.0014069080352783203 seconds
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 0.001169443130493164 seconds
 ------------------------------------------------------
-Free memory : 28.318542 (GigaBytes)  
+Free memory : 25.666199 (GigaBytes)  
 Total memory: 39.586121 (GigaBytes)  
 Requested memory: 0.515625 (GigaBytes) 
 Setting maximum total tokens (input + output) to 512 
-WorkSpace: 0x7efdf6000000 
+WorkSpace: 0x7fa71e000000 
 ------------------------------------------------------
-[2023-04-14 08:47:24,971] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-[2023-04-14 08:47:25,055] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 0|ppo_ep: 1|act_loss: 0.08221435546875|cri_loss: 0.051910400390625|unsuper_loss: 0.0
-average reward score: 0.74267578125
--------------------------------------------------------------------------------------
-|E2E latency=3.40s |Gather latency=0.00s (0.00%) |Generate time=2.68s (79.04%) |Training time=0.62s (18.39%) |Others=0.09 (2.57%)|CurSamplesPerSec=9.42 |AvgSamplesPerSec=9.42
-[2023-04-14 08:47:27,082] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-[2023-04-14 08:47:27,167] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 1|ppo_ep: 1|act_loss: -0.0277862548828125|cri_loss: -0.0082244873046875|unsuper_loss: 0.0
-average reward score: 1.08203125
--------------------------------------------------------------------------------------
-|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.64s (77.71%) |Training time=0.38s (18.12%) |Others=0.09 (4.17%)|CurSamplesPerSec=15.15 |AvgSamplesPerSec=11.62
-[2023-04-14 08:47:29,187] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
-[2023-04-14 08:47:29,272] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
-epoch: 0|step: 2|ppo_ep: 1|act_loss: 0.009063720703125|cri_loss: 0.0110931396484375|unsuper_loss: 0.0
-average reward score: 0.4833984375
--------------------------------------------------------------------------------------
-|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.60s (76.06%) |Training time=0.41s (19.68%) |Others=0.09 (4.26%)|CurSamplesPerSec=15.20 |AvgSamplesPerSec=12.61
-[2023-04-14 08:47:31,435] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192
-epoch: 0|step: 3|ppo_ep: 1|act_loss: 0.374755859375|cri_loss: 0.230712890625|unsuper_loss: 0.0
-average reward score: 0.4599609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.85%) |Training time=0.45s (20.99%) |Others=0.09 (4.17%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=13.09
-epoch: 0|step: 4|ppo_ep: 1|act_loss: 0.25146484375|cri_loss: 0.15087890625|unsuper_loss: 0.0
-average reward score: 0.5869140625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.82%) |Training time=0.44s (20.62%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=13.41
-epoch: 0|step: 5|ppo_ep: 1|act_loss: -0.0999755859375|cri_loss: -0.04248046875|unsuper_loss: 0.0
-average reward score: 1.0810546875
--------------------------------------------------------------------------------------
-|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.79s (75.75%) |Training time=0.47s (20.04%) |Others=0.10 (4.21%)|CurSamplesPerSec=13.54 |AvgSamplesPerSec=13.43
-epoch: 0|step: 6|ppo_ep: 1|act_loss: -0.05633544921875|cri_loss: -0.0228271484375|unsuper_loss: 0.0
-average reward score: 1.267578125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.19%) |Training time=0.43s (20.08%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=13.64
-epoch: 0|step: 7|ppo_ep: 1|act_loss: 0.14599609375|cri_loss: 0.0970458984375|unsuper_loss: 0.0
-average reward score: 0.63232421875
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.83%) |Training time=0.44s (20.56%) |Others=0.10 (4.61%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=13.79
-epoch: 0|step: 8|ppo_ep: 1|act_loss: -0.1309814453125|cri_loss: -0.061859130859375|unsuper_loss: 0.0
-average reward score: 1.03125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.23%) |Training time=0.45s (21.08%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=13.91
-[2023-04-14 08:47:44,382] [INFO] [logging.py:96:log_dist] [Rank 0] step=10, skipped=3, lr=[6.755000000000001e-07, 6.755000000000001e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 08:47:44,400] [INFO] [timer.py:199:stop] epoch=0/micro_step=10/global_step=10, RunningAvgSamplesPerSec=113.50591974216755, CurrSamplesPerSec=110.00055566710786, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 08:47:44,493] [INFO] [logging.py:96:log_dist] [Rank 0] step=10, skipped=4, lr=[3.0000000000000004e-07, 3.0000000000000004e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 9|ppo_ep: 1|act_loss: 0.1016845703125|cri_loss: 0.0560302734375|unsuper_loss: 0.0
-average reward score: 0.6484375
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.16%) |Training time=0.45s (21.26%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.01
-epoch: 0|step: 10|ppo_ep: 1|act_loss: -0.150390625|cri_loss: -0.06866455078125|unsuper_loss: 0.0
-average reward score: 0.9638671875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.93%) |Training time=0.46s (21.39%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.09
-epoch: 0|step: 11|ppo_ep: 1|act_loss: -0.00390625|cri_loss: 0.007171630859375|unsuper_loss: 0.0
-average reward score: 0.94287109375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.15%) |Training time=0.45s (21.25%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.16
-epoch: 0|step: 12|ppo_ep: 1|act_loss: -0.043121337890625|cri_loss: -0.01629638671875|unsuper_loss: 0.0
-average reward score: 1.5361328125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.16%) |Training time=0.45s (21.16%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.22
-epoch: 0|step: 13|ppo_ep: 1|act_loss: 0.06817626953125|cri_loss: 0.04638671875|unsuper_loss: 0.0
-average reward score: 1.15234375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.95%) |Training time=0.46s (21.47%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.27
-epoch: 0|step: 14|ppo_ep: 1|act_loss: -0.041015625|cri_loss: -0.0144195556640625|unsuper_loss: 0.0
-average reward score: 1.4755859375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.02%) |Training time=0.46s (21.29%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.31
-epoch: 0|step: 15|ppo_ep: 1|act_loss: -0.01495361328125|cri_loss: -0.003643035888671875|unsuper_loss: 0.0
-average reward score: 1.29296875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.40%) |Training time=0.48s (22.04%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.33
-epoch: 0|step: 16|ppo_ep: 1|act_loss: 0.165283203125|cri_loss: 0.108154296875|unsuper_loss: 0.0
-average reward score: 1.4296875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.28%) |Training time=0.48s (21.95%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.36
-epoch: 0|step: 17|ppo_ep: 1|act_loss: -0.022979736328125|cri_loss: -0.0068511962890625|unsuper_loss: 0.0
-average reward score: 1.6962890625
--------------------------------------------------------------------------------------
-|E2E latency=3.46s |Gather latency=0.00s (0.00%) |Generate time=1.59s (46.04%) |Training time=0.48s (13.84%) |Others=1.39 (40.12%)|CurSamplesPerSec=9.25 |AvgSamplesPerSec=13.93
-epoch: 0|step: 18|ppo_ep: 1|act_loss: -0.06494140625|cri_loss: -0.02099609375|unsuper_loss: 0.0
-average reward score: 1.404296875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.66%) |Training time=0.45s (20.69%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=13.98
-[2023-04-14 08:48:07,384] [INFO] [logging.py:96:log_dist] [Rank 0] step=20, skipped=3, lr=[1.6405000000000002e-06, 1.6405000000000002e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 08:48:07,402] [INFO] [timer.py:199:stop] epoch=0/micro_step=20/global_step=20, RunningAvgSamplesPerSec=109.13579418396058, CurrSamplesPerSec=98.81244018293347, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 08:48:07,495] [INFO] [logging.py:96:log_dist] [Rank 0] step=20, skipped=4, lr=[8.000000000000001e-07, 8.000000000000001e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 19|ppo_ep: 1|act_loss: 0.1060791015625|cri_loss: 0.056182861328125|unsuper_loss: 0.0
-average reward score: 1.443359375
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.88%) |Training time=0.49s (20.82%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=13.96
-[2023-04-14 08:48:09,536] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192
-epoch: 0|step: 20|ppo_ep: 1|act_loss: 0.0240478515625|cri_loss: 0.0189361572265625|unsuper_loss: 0.0
-average reward score: 1.62109375
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.13%) |Training time=0.45s (21.13%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.01
-epoch: 0|step: 21|ppo_ep: 1|act_loss: -0.0139617919921875|cri_loss: 0.00112152099609375|unsuper_loss: 0.0
-average reward score: 1.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.64%) |Training time=0.48s (21.81%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.04
-epoch: 0|step: 22|ppo_ep: 1|act_loss: 0.06494140625|cri_loss: 0.0443115234375|unsuper_loss: 0.0
-average reward score: 1.67578125
--------------------------------------------------------------------------------------
-|E2E latency=2.87s |Gather latency=0.00s (0.00%) |Generate time=1.58s (55.15%) |Training time=0.47s (16.54%) |Others=0.81 (28.32%)|CurSamplesPerSec=11.15 |AvgSamplesPerSec=13.88
-epoch: 0|step: 23|ppo_ep: 1|act_loss: 0.06280517578125|cri_loss: 0.04296875|unsuper_loss: 0.0
-average reward score: 1.5107421875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.40%) |Training time=0.48s (22.04%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=13.91
-epoch: 0|step: 24|ppo_ep: 1|act_loss: 0.11566162109375|cri_loss: 0.0645751953125|unsuper_loss: 0.0
-average reward score: 1.412109375
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.68s (74.14%) |Training time=0.49s (21.43%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.13 |AvgSamplesPerSec=13.92
-epoch: 0|step: 25|ppo_ep: 1|act_loss: 0.04132080078125|cri_loss: 0.030853271484375|unsuper_loss: 0.0
-average reward score: 2.07421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.28%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=13.95
-epoch: 0|step: 26|ppo_ep: 1|act_loss: 0.0771484375|cri_loss: 0.043548583984375|unsuper_loss: 0.0
-average reward score: 2.001953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.28%) |Training time=0.48s (22.04%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=13.98
-epoch: 0|step: 27|ppo_ep: 1|act_loss: 0.052703857421875|cri_loss: 0.030303955078125|unsuper_loss: 0.0
-average reward score: 1.6484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.17%) |Training time=0.48s (22.14%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.01
-epoch: 0|step: 28|ppo_ep: 1|act_loss: -0.023284912109375|cri_loss: -0.00701904296875|unsuper_loss: 0.0
-average reward score: 1.5791015625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.55%) |Training time=0.47s (21.82%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.04
-[2023-04-14 08:48:29,822] [INFO] [logging.py:96:log_dist] [Rank 0] step=30, skipped=4, lr=[2.5090000000000005e-06, 2.5090000000000005e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 08:48:29,840] [INFO] [timer.py:199:stop] epoch=0/micro_step=30/global_step=30, RunningAvgSamplesPerSec=106.47952620942749, CurrSamplesPerSec=99.61674735645987, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 08:48:29,933] [INFO] [logging.py:96:log_dist] [Rank 0] step=30, skipped=4, lr=[1.3e-06, 1.3e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 29|ppo_ep: 1|act_loss: -0.11663818359375|cri_loss: -0.05499267578125|unsuper_loss: 0.0
-average reward score: 1.8896484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.10%) |Training time=0.48s (22.37%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.06
-epoch: 0|step: 30|ppo_ep: 1|act_loss: -0.067626953125|cri_loss: -0.02947998046875|unsuper_loss: 0.0
-average reward score: 1.865234375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.19%) |Training time=0.48s (22.18%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.08
-epoch: 0|step: 31|ppo_ep: 1|act_loss: -0.127685546875|cri_loss: -0.048828125|unsuper_loss: 0.0
-average reward score: 1.484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.27%) |Training time=0.48s (22.18%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.11
-epoch: 0|step: 32|ppo_ep: 1|act_loss: 0.05084228515625|cri_loss: 0.0467529296875|unsuper_loss: 0.0
-average reward score: 1.5107421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.43%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.13
-epoch: 0|step: 33|ppo_ep: 1|act_loss: -0.0036468505859375|cri_loss: 0.000675201416015625|unsuper_loss: 0.0
-average reward score: 1.853515625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.07%) |Training time=0.46s (21.35%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.15
-epoch: 0|step: 34|ppo_ep: 1|act_loss: 0.03912353515625|cri_loss: 0.0289306640625|unsuper_loss: 0.0
-average reward score: 2.162109375
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.04%) |Training time=0.48s (20.58%) |Others=0.10 (4.38%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.14
-epoch: 0|step: 35|ppo_ep: 1|act_loss: 0.00030517578125|cri_loss: 0.0166015625|unsuper_loss: 0.0
-average reward score: 2.197265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.20%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.15
-epoch: 0|step: 36|ppo_ep: 1|act_loss: 0.02532958984375|cri_loss: 0.0178680419921875|unsuper_loss: 0.0
-average reward score: 2.248046875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.47s (21.90%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.17
-epoch: 0|step: 37|ppo_ep: 1|act_loss: 0.0909423828125|cri_loss: 0.1270751953125|unsuper_loss: 0.0
-average reward score: 2.359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.48s (22.07%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.19
-epoch: 0|step: 38|ppo_ep: 1|act_loss: 0.1304931640625|cri_loss: 0.0841064453125|unsuper_loss: 0.0
-average reward score: 2.462890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.79%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.20
-[2023-04-14 08:48:51,574] [INFO] [logging.py:96:log_dist] [Rank 0] step=40, skipped=4, lr=[3.474e-06, 3.474e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 08:48:51,593] [INFO] [timer.py:199:stop] epoch=0/micro_step=40/global_step=40, RunningAvgSamplesPerSec=105.52605461942416, CurrSamplesPerSec=102.21813606772618, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 08:48:51,686] [INFO] [logging.py:96:log_dist] [Rank 0] step=40, skipped=4, lr=[1.8000000000000001e-06, 1.8000000000000001e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 39|ppo_ep: 1|act_loss: 0.02679443359375|cri_loss: 0.0269775390625|unsuper_loss: 0.0
-average reward score: 2.24609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.30%) |Training time=0.48s (22.05%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.22
-epoch: 0|step: 40|ppo_ep: 1|act_loss: 0.11529541015625|cri_loss: 0.06890869140625|unsuper_loss: 0.0
-average reward score: 2.169921875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.29%) |Training time=0.48s (22.07%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.23
-epoch: 0|step: 41|ppo_ep: 1|act_loss: 0.003875732421875|cri_loss: 0.0196533203125|unsuper_loss: 0.0
-average reward score: 2.478515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.48s (22.01%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.25
-epoch: 0|step: 42|ppo_ep: 1|act_loss: 0.06878662109375|cri_loss: 0.043701171875|unsuper_loss: 0.0
-average reward score: 2.244140625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.45%) |Training time=0.47s (21.89%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.26
-epoch: 0|step: 43|ppo_ep: 1|act_loss: 0.039947509765625|cri_loss: 0.03765869140625|unsuper_loss: 0.0
-average reward score: 2.173828125
--------------------------------------------------------------------------------------
-|E2E latency=2.42s |Gather latency=0.00s (0.00%) |Generate time=1.58s (65.32%) |Training time=0.47s (19.58%) |Others=0.37 (15.10%)|CurSamplesPerSec=13.20 |AvgSamplesPerSec=14.23
-epoch: 0|step: 44|ppo_ep: 1|act_loss: -0.0477294921875|cri_loss: -0.0135498046875|unsuper_loss: 0.0
-average reward score: 2.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.42%) |Training time=0.47s (21.88%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.25
-epoch: 0|step: 45|ppo_ep: 1|act_loss: 0.006374359130859375|cri_loss: 0.0103607177734375|unsuper_loss: 0.0
-average reward score: 2.35546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.43%) |Training time=0.47s (22.02%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.26
-epoch: 0|step: 46|ppo_ep: 1|act_loss: -0.04522705078125|cri_loss: -0.01043701171875|unsuper_loss: 0.0
-average reward score: 2.5859375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.21%) |Training time=0.48s (22.17%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.27
-epoch: 0|step: 47|ppo_ep: 1|act_loss: -0.21044921875|cri_loss: -0.07177734375|unsuper_loss: 0.0
-average reward score: 1.8818359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.62%) |Training time=0.47s (21.80%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.28
-epoch: 0|step: 48|ppo_ep: 1|act_loss: -0.071044921875|cri_loss: -0.0281982421875|unsuper_loss: 0.0
-average reward score: 1.77734375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.25%) |Training time=0.44s (20.25%) |Others=0.12 (5.50%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.29
-[2023-04-14 08:49:13,586] [INFO] [logging.py:96:log_dist] [Rank 0] step=50, skipped=4, lr=[4.439e-06, 4.439e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 08:49:13,604] [INFO] [timer.py:199:stop] epoch=0/micro_step=50/global_step=50, RunningAvgSamplesPerSec=105.16244786155492, CurrSamplesPerSec=102.06640283983927, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 08:49:13,697] [INFO] [logging.py:96:log_dist] [Rank 0] step=50, skipped=4, lr=[2.3000000000000004e-06, 2.3000000000000004e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 49|ppo_ep: 1|act_loss: -0.0042724609375|cri_loss: 0.0188140869140625|unsuper_loss: 0.0
-average reward score: 2.279296875
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.25%) |Training time=0.48s (20.51%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.79 |AvgSamplesPerSec=14.28
-epoch: 0|step: 50|ppo_ep: 1|act_loss: 0.0303955078125|cri_loss: 0.0217437744140625|unsuper_loss: 0.0
-average reward score: 1.8505859375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.13%) |Training time=0.48s (22.24%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.29
-epoch: 0|step: 51|ppo_ep: 1|act_loss: 0.0499267578125|cri_loss: 0.031097412109375|unsuper_loss: 0.0
-average reward score: 1.7294921875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.47s (21.99%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.30
-epoch: 0|step: 52|ppo_ep: 1|act_loss: 0.080322265625|cri_loss: 0.04913330078125|unsuper_loss: 0.0
-average reward score: 2.24609375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.98%) |Training time=0.49s (22.40%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.31
-epoch: 0|step: 53|ppo_ep: 1|act_loss: -0.0433349609375|cri_loss: -0.0122833251953125|unsuper_loss: 0.0
-average reward score: 2.27734375
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.68s (74.28%) |Training time=0.48s (21.29%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.18 |AvgSamplesPerSec=14.31
-epoch: 0|step: 54|ppo_ep: 1|act_loss: -0.0277557373046875|cri_loss: -0.0034637451171875|unsuper_loss: 0.0
-average reward score: 2.482421875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.48%) |Training time=0.47s (21.87%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.32
-epoch: 0|step: 55|ppo_ep: 1|act_loss: -0.006175994873046875|cri_loss: 0.000583648681640625|unsuper_loss: 0.0
-average reward score: 2.197265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.10%) |Training time=0.48s (22.33%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.33
-epoch: 0|step: 56|ppo_ep: 1|act_loss: 0.05126953125|cri_loss: 0.0340576171875|unsuper_loss: 0.0
-average reward score: 1.505859375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.53%) |Training time=0.47s (21.83%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.33
-epoch: 0|step: 57|ppo_ep: 1|act_loss: 0.0333251953125|cri_loss: 0.02850341796875|unsuper_loss: 0.0
-average reward score: 1.888671875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.16%) |Training time=0.48s (22.19%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.34
-epoch: 0|step: 58|ppo_ep: 1|act_loss: 0.0157318115234375|cri_loss: 0.017181396484375|unsuper_loss: 0.0
-average reward score: 2.6796875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.10%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.35
-[2023-04-14 08:49:35,261] [INFO] [logging.py:96:log_dist] [Rank 0] step=60, skipped=4, lr=[5.404000000000001e-06, 5.404000000000001e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 08:49:35,280] [INFO] [timer.py:199:stop] epoch=0/micro_step=60/global_step=60, RunningAvgSamplesPerSec=104.51481421567632, CurrSamplesPerSec=102.19540958042981, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 08:49:35,373] [INFO] [logging.py:96:log_dist] [Rank 0] step=60, skipped=4, lr=[2.8000000000000003e-06, 2.8000000000000003e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 59|ppo_ep: 1|act_loss: 0.1995849609375|cri_loss: 0.1263427734375|unsuper_loss: 0.0
-average reward score: 2.279296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.29%) |Training time=0.48s (22.07%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.36
-epoch: 0|step: 60|ppo_ep: 1|act_loss: 0.0227508544921875|cri_loss: 0.0230255126953125|unsuper_loss: 0.0
-average reward score: 2.01953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.24%) |Training time=0.48s (22.09%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.37
-epoch: 0|step: 61|ppo_ep: 1|act_loss: -0.019378662109375|cri_loss: 0.0001220703125|unsuper_loss: 0.0
-average reward score: 2.421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.40%) |Training time=0.47s (21.88%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.37
-epoch: 0|step: 62|ppo_ep: 1|act_loss: 0.0308380126953125|cri_loss: 0.02142333984375|unsuper_loss: 0.0
-average reward score: 2.513671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.13%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.38
-epoch: 0|step: 63|ppo_ep: 1|act_loss: 0.01335906982421875|cri_loss: 0.0110626220703125|unsuper_loss: 0.0
-average reward score: 2.2421875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.46s (21.16%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.39
-epoch: 0|step: 64|ppo_ep: 1|act_loss: 0.07464599609375|cri_loss: 0.04296875|unsuper_loss: 0.0
-average reward score: 2.30859375
--------------------------------------------------------------------------------------
-|E2E latency=2.76s |Gather latency=0.00s (0.00%) |Generate time=1.75s (63.68%) |Training time=0.46s (16.86%) |Others=0.54 (19.46%)|CurSamplesPerSec=11.61 |AvgSamplesPerSec=14.34
-epoch: 0|step: 65|ppo_ep: 1|act_loss: -0.02947998046875|cri_loss: 0.002777099609375|unsuper_loss: 0.0
-average reward score: 2.33984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.47s (21.80%) |Others=0.10 (4.82%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.34
-epoch: 0|step: 66|ppo_ep: 1|act_loss: 0.08624267578125|cri_loss: 0.05859375|unsuper_loss: 0.0
-average reward score: 2.13671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.48s (22.27%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.35
-epoch: 0|step: 67|ppo_ep: 1|act_loss: 0.03778076171875|cri_loss: 0.02117919921875|unsuper_loss: 0.0
-average reward score: 1.986328125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.43%) |Training time=0.47s (21.91%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.36
-epoch: 0|step: 68|ppo_ep: 1|act_loss: 0.0061492919921875|cri_loss: 0.01113128662109375|unsuper_loss: 0.0
-average reward score: 2.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.48s (22.02%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.36
-[2023-04-14 08:49:57,426] [INFO] [logging.py:96:log_dist] [Rank 0] step=70, skipped=4, lr=[6.369000000000001e-06, 6.369000000000001e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 08:49:57,445] [INFO] [timer.py:199:stop] epoch=0/micro_step=70/global_step=70, RunningAvgSamplesPerSec=104.34243082596494, CurrSamplesPerSec=101.61912549440034, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 08:49:57,538] [INFO] [logging.py:96:log_dist] [Rank 0] step=70, skipped=4, lr=[3.3000000000000006e-06, 3.3000000000000006e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 69|ppo_ep: 1|act_loss: 0.04266357421875|cri_loss: 0.057037353515625|unsuper_loss: 0.0
-average reward score: 2.65234375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.18%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.37
-epoch: 0|step: 70|ppo_ep: 1|act_loss: -0.05322265625|cri_loss: 0.0018310546875|unsuper_loss: 0.0
-average reward score: 2.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.09%) |Training time=0.48s (22.26%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.38
-epoch: 0|step: 71|ppo_ep: 1|act_loss: -0.072509765625|cri_loss: -0.027374267578125|unsuper_loss: 0.0
-average reward score: 2.64453125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.47s (22.01%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.38
-epoch: 0|step: 72|ppo_ep: 1|act_loss: 0.0211181640625|cri_loss: 0.0151519775390625|unsuper_loss: 0.0
-average reward score: 1.9931640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.33%) |Training time=0.47s (22.03%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.39
-epoch: 0|step: 73|ppo_ep: 1|act_loss: -0.056396484375|cri_loss: -0.0245513916015625|unsuper_loss: 0.0
-average reward score: 2.205078125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.47s (22.00%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.39
-epoch: 0|step: 74|ppo_ep: 1|act_loss: -0.10888671875|cri_loss: -0.047149658203125|unsuper_loss: 0.0
-average reward score: 2.083984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.63%) |Training time=0.47s (21.71%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.40
-epoch: 0|step: 75|ppo_ep: 1|act_loss: -0.0101776123046875|cri_loss: 2.288818359375e-05|unsuper_loss: 0.0
-average reward score: 2.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.16%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41
-epoch: 0|step: 76|ppo_ep: 1|act_loss: 0.1962890625|cri_loss: 0.11920166015625|unsuper_loss: 0.0
-average reward score: 2.0234375
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.60s (69.30%) |Training time=0.47s (20.44%) |Others=0.24 (10.26%)|CurSamplesPerSec=13.87 |AvgSamplesPerSec=14.40
-epoch: 0|step: 77|ppo_ep: 1|act_loss: 0.2744140625|cri_loss: 0.161865234375|unsuper_loss: 0.0
-average reward score: 2.255859375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.50%) |Training time=0.47s (21.85%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.40
-epoch: 0|step: 78|ppo_ep: 1|act_loss: 0.05914306640625|cri_loss: 0.03167724609375|unsuper_loss: 0.0
-average reward score: 1.486328125
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.15%) |Training time=0.49s (22.20%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.41
-[2023-04-14 08:50:19,193] [INFO] [logging.py:96:log_dist] [Rank 0] step=80, skipped=4, lr=[7.3340000000000004e-06, 7.3340000000000004e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 08:50:19,212] [INFO] [timer.py:199:stop] epoch=0/micro_step=80/global_step=80, RunningAvgSamplesPerSec=104.23033362572767, CurrSamplesPerSec=106.49301658358083, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 08:50:19,304] [INFO] [logging.py:96:log_dist] [Rank 0] step=80, skipped=4, lr=[3.8000000000000005e-06, 3.8000000000000005e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 79|ppo_ep: 1|act_loss: 0.04840087890625|cri_loss: 0.0286712646484375|unsuper_loss: 0.0
-average reward score: 2.51953125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.51%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.41
-epoch: 0|step: 80|ppo_ep: 1|act_loss: -0.1134033203125|cri_loss: -0.039398193359375|unsuper_loss: 0.0
-average reward score: 2.19140625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.75%) |Training time=0.46s (21.58%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.42
-epoch: 0|step: 81|ppo_ep: 1|act_loss: -0.018402099609375|cri_loss: -0.006984710693359375|unsuper_loss: 0.0
-average reward score: 1.7470703125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.67%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
-epoch: 0|step: 82|ppo_ep: 1|act_loss: 0.0022106170654296875|cri_loss: 0.00585174560546875|unsuper_loss: 0.0
-average reward score: 2.59375
--------------------------------------------------------------------------------------
-|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.61s (72.33%) |Training time=0.48s (21.58%) |Others=0.14 (6.08%)|CurSamplesPerSec=14.34 |AvgSamplesPerSec=14.42
-epoch: 0|step: 83|ppo_ep: 1|act_loss: -0.027191162109375|cri_loss: -0.01220703125|unsuper_loss: 0.0
-average reward score: 2.3984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.68%) |Training time=0.47s (21.72%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.43
-epoch: 0|step: 84|ppo_ep: 1|act_loss: 0.021575927734375|cri_loss: 0.0121307373046875|unsuper_loss: 0.0
-average reward score: 2.166015625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.68%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43
-epoch: 0|step: 85|ppo_ep: 1|act_loss: -0.00457763671875|cri_loss: 0.0264892578125|unsuper_loss: 0.0
-average reward score: 2.62890625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.74%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.43
-epoch: 0|step: 86|ppo_ep: 1|act_loss: 0.05242919921875|cri_loss: 0.030517578125|unsuper_loss: 0.0
-average reward score: 2.1796875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.71%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.44
-epoch: 0|step: 87|ppo_ep: 1|act_loss: -0.063720703125|cri_loss: -0.02862548828125|unsuper_loss: 0.0
-average reward score: 2.689453125
--------------------------------------------------------------------------------------
-|E2E latency=3.10s |Gather latency=0.00s (0.00%) |Generate time=1.59s (51.29%) |Training time=0.47s (15.14%) |Others=1.04 (33.57%)|CurSamplesPerSec=10.32 |AvgSamplesPerSec=14.37
-epoch: 0|step: 88|ppo_ep: 1|act_loss: 0.04443359375|cri_loss: 0.0270843505859375|unsuper_loss: 0.0
-average reward score: 2.146484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.47s (21.85%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.38
-[2023-04-14 08:50:41,764] [INFO] [logging.py:96:log_dist] [Rank 0] step=90, skipped=4, lr=[8.299000000000001e-06, 8.299000000000001e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 08:50:41,783] [INFO] [timer.py:199:stop] epoch=0/micro_step=90/global_step=90, RunningAvgSamplesPerSec=104.24126059063528, CurrSamplesPerSec=104.1532635552079, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 08:50:41,875] [INFO] [logging.py:96:log_dist] [Rank 0] step=90, skipped=4, lr=[4.3e-06, 4.3e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 89|ppo_ep: 1|act_loss: 0.021820068359375|cri_loss: 0.01277923583984375|unsuper_loss: 0.0
-average reward score: 2.080078125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.67%) |Training time=0.47s (21.79%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.38
-epoch: 0|step: 90|ppo_ep: 1|act_loss: 0.04132080078125|cri_loss: 0.025177001953125|unsuper_loss: 0.0
-average reward score: 2.509765625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.80%) |Training time=0.49s (22.61%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.39
-epoch: 0|step: 91|ppo_ep: 1|act_loss: -0.03411865234375|cri_loss: -0.014068603515625|unsuper_loss: 0.0
-average reward score: 2.0859375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.56%) |Training time=0.50s (22.97%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.39
-epoch: 0|step: 92|ppo_ep: 1|act_loss: 0.0838623046875|cri_loss: 0.04376220703125|unsuper_loss: 0.0
-average reward score: 1.935546875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.47%) |Training time=0.50s (22.94%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.39
-epoch: 0|step: 93|ppo_ep: 1|act_loss: 0.052032470703125|cri_loss: 0.028717041015625|unsuper_loss: 0.0
-average reward score: 1.8779296875
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.63s (69.96%) |Training time=0.59s (25.52%) |Others=0.11 (4.52%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.38
-epoch: 0|step: 94|ppo_ep: 1|act_loss: 0.02972412109375|cri_loss: 0.0212554931640625|unsuper_loss: 0.0
-average reward score: 2.41796875
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.25%) |Training time=0.51s (23.21%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.39
-epoch: 0|step: 95|ppo_ep: 1|act_loss: -0.09625244140625|cri_loss: -0.038818359375|unsuper_loss: 0.0
-average reward score: 2.189453125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.46%) |Training time=0.50s (23.04%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.39
-epoch: 0|step: 96|ppo_ep: 1|act_loss: -0.003040313720703125|cri_loss: 0.001705169677734375|unsuper_loss: 0.0
-average reward score: 2.337890625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.65%) |Training time=0.50s (22.75%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.39
-epoch: 0|step: 97|ppo_ep: 1|act_loss: 0.00817108154296875|cri_loss: 0.006500244140625|unsuper_loss: 0.0
-average reward score: 1.982421875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.65%) |Training time=0.50s (22.78%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.39
-epoch: 0|step: 98|ppo_ep: 1|act_loss: 0.0372314453125|cri_loss: 0.0206298828125|unsuper_loss: 0.0
-average reward score: 2.24609375
--------------------------------------------------------------------------------------
-|E2E latency=2.80s |Gather latency=0.00s (0.00%) |Generate time=1.58s (56.59%) |Training time=0.50s (17.98%) |Others=0.71 (25.43%)|CurSamplesPerSec=11.43 |AvgSamplesPerSec=14.36
-[2023-04-14 08:51:04,421] [INFO] [logging.py:96:log_dist] [Rank 0] step=100, skipped=4, lr=[9.264e-06, 9.264e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 08:51:04,439] [INFO] [timer.py:199:stop] epoch=0/micro_step=100/global_step=100, RunningAvgSamplesPerSec=102.92779009185206, CurrSamplesPerSec=92.65775833740642, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 08:51:04,532] [INFO] [logging.py:96:log_dist] [Rank 0] step=100, skipped=4, lr=[4.800000000000001e-06, 4.800000000000001e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 99|ppo_ep: 1|act_loss: 0.050079345703125|cri_loss: 0.0273284912109375|unsuper_loss: 0.0
-average reward score: 2.841796875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.36%) |Training time=0.51s (23.17%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.36
-epoch: 0|step: 100|ppo_ep: 1|act_loss: -0.035552978515625|cri_loss: -0.0124969482421875|unsuper_loss: 0.0
-average reward score: 2.423828125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.53%) |Training time=0.50s (22.88%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.36
-epoch: 0|step: 101|ppo_ep: 1|act_loss: 0.02716064453125|cri_loss: 0.019012451171875|unsuper_loss: 0.0
-average reward score: 2.265625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.32%) |Training time=0.51s (23.11%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.36
-epoch: 0|step: 102|ppo_ep: 1|act_loss: 0.03790283203125|cri_loss: 0.020172119140625|unsuper_loss: 0.0
-average reward score: 1.931640625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.43%) |Training time=0.50s (23.01%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.37
-epoch: 0|step: 103|ppo_ep: 1|act_loss: -0.005924224853515625|cri_loss: -0.000888824462890625|unsuper_loss: 0.0
-average reward score: 2.119140625
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.53%) |Training time=0.51s (23.04%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.37
-epoch: 0|step: 104|ppo_ep: 1|act_loss: -0.02667236328125|cri_loss: 0.0159912109375|unsuper_loss: 0.0
-average reward score: 2.685546875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.51%) |Training time=0.50s (22.91%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.37
-epoch: 0|step: 105|ppo_ep: 1|act_loss: 0.0069122314453125|cri_loss: 0.00909423828125|unsuper_loss: 0.0
-average reward score: 3.01171875
--------------------------------------------------------------------------------------
-|E2E latency=3.44s |Gather latency=0.00s (0.00%) |Generate time=1.58s (46.01%) |Training time=0.50s (14.67%) |Others=1.35 (39.32%)|CurSamplesPerSec=9.30 |AvgSamplesPerSec=14.30
-epoch: 0|step: 106|ppo_ep: 1|act_loss: -0.02008056640625|cri_loss: -0.00409698486328125|unsuper_loss: 0.0
-average reward score: 4.37109375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.17%) |Training time=0.51s (23.28%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.30
-epoch: 0|step: 107|ppo_ep: 1|act_loss: 0.0693359375|cri_loss: 0.03973388671875|unsuper_loss: 0.0
-average reward score: 2.705078125
--------------------------------------------------------------------------------------
-|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.77s (74.59%) |Training time=0.50s (21.21%) |Others=0.10 (4.20%)|CurSamplesPerSec=13.49 |AvgSamplesPerSec=14.29
-epoch: 0|step: 108|ppo_ep: 1|act_loss: 0.0755615234375|cri_loss: 0.046112060546875|unsuper_loss: 0.0
-average reward score: 3.279296875
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.24%) |Training time=0.51s (23.12%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.29
-[2023-04-14 08:51:27,825] [INFO] [logging.py:96:log_dist] [Rank 0] step=110, skipped=4, lr=[9.649987126724682e-06, 9.649987126724682e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 08:51:27,843] [INFO] [timer.py:199:stop] epoch=0/micro_step=110/global_step=110, RunningAvgSamplesPerSec=101.84287411563196, CurrSamplesPerSec=84.43894398574668, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 08:51:27,936] [INFO] [logging.py:96:log_dist] [Rank 0] step=110, skipped=4, lr=[4.999993329909162e-06, 4.999993329909162e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 109|ppo_ep: 1|act_loss: 0.0775146484375|cri_loss: 0.044769287109375|unsuper_loss: 0.0
-average reward score: 3.353515625
--------------------------------------------------------------------------------------
-|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.60s (71.41%) |Training time=0.54s (24.22%) |Others=0.10 (4.36%)|CurSamplesPerSec=14.31 |AvgSamplesPerSec=14.29
-epoch: 0|step: 110|ppo_ep: 1|act_loss: 0.1475830078125|cri_loss: 0.1015625|unsuper_loss: 0.0
-average reward score: 3.5390625
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.62s (71.42%) |Training time=0.53s (23.33%) |Others=0.12 (5.25%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.29
-epoch: 0|step: 111|ppo_ep: 1|act_loss: -0.0105133056640625|cri_loss: -0.00122833251953125|unsuper_loss: 0.0
-average reward score: 3.443359375
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.65s (72.47%) |Training time=0.53s (23.06%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.02 |AvgSamplesPerSec=14.29
-epoch: 0|step: 112|ppo_ep: 1|act_loss: -0.0516357421875|cri_loss: -0.0228424072265625|unsuper_loss: 0.0
-average reward score: 3.765625
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.88%) |Training time=0.52s (23.59%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.29
-epoch: 0|step: 113|ppo_ep: 1|act_loss: -0.0172882080078125|cri_loss: 0.0036468505859375|unsuper_loss: 0.0
-average reward score: 3.423828125
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.68%) |Training time=0.53s (23.89%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.46 |AvgSamplesPerSec=14.29
-epoch: 0|step: 114|ppo_ep: 1|act_loss: 0.12249755859375|cri_loss: 0.0792236328125|unsuper_loss: 0.0
-average reward score: 2.9453125
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.01%) |Training time=0.52s (23.45%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.29
-epoch: 0|step: 115|ppo_ep: 1|act_loss: 0.03033447265625|cri_loss: 0.023590087890625|unsuper_loss: 0.0
-average reward score: 3.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.05%) |Training time=0.52s (23.51%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.46 |AvgSamplesPerSec=14.30
-epoch: 0|step: 116|ppo_ep: 1|act_loss: 0.004199981689453125|cri_loss: 0.00466156005859375|unsuper_loss: 0.0
-average reward score: 3.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.08s |Gather latency=0.00s (0.00%) |Generate time=1.59s (76.49%) |Training time=0.39s (18.63%) |Others=0.10 (4.88%)|CurSamplesPerSec=15.41 |AvgSamplesPerSec=14.30
-epoch: 0|step: 117|ppo_ep: 1|act_loss: 0.0142974853515625|cri_loss: 0.00982666015625|unsuper_loss: 0.0
-average reward score: 3.892578125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.63%) |Training time=0.50s (22.89%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.31
-epoch: 0|step: 118|ppo_ep: 1|act_loss: 0.09619140625|cri_loss: 0.05419921875|unsuper_loss: 0.0
-average reward score: 3.646484375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.78%) |Training time=0.50s (22.64%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.31
-[2023-04-14 08:51:50,104] [INFO] [logging.py:96:log_dist] [Rank 0] step=120, skipped=4, lr=[9.649908456957608e-06, 9.649908456957608e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 08:51:50,123] [INFO] [timer.py:199:stop] epoch=0/micro_step=120/global_step=120, RunningAvgSamplesPerSec=101.11741645963838, CurrSamplesPerSec=93.20669082868693, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 08:51:50,215] [INFO] [logging.py:96:log_dist] [Rank 0] step=120, skipped=4, lr=[4.999952568371817e-06, 4.999952568371817e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 119|ppo_ep: 1|act_loss: 0.18896484375|cri_loss: 0.116455078125|unsuper_loss: 0.0
-average reward score: 3.484375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.48%) |Training time=0.51s (23.06%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.31
-epoch: 0|step: 120|ppo_ep: 1|act_loss: -0.066162109375|cri_loss: -0.029693603515625|unsuper_loss: 0.0
-average reward score: 3.103515625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.44%) |Training time=0.50s (23.01%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.31
-epoch: 0|step: 121|ppo_ep: 1|act_loss: -0.02349853515625|cri_loss: -0.00815582275390625|unsuper_loss: 0.0
-average reward score: 3.55859375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.30%) |Training time=0.51s (23.15%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.32
-epoch: 0|step: 122|ppo_ep: 1|act_loss: 0.03350830078125|cri_loss: 0.0233154296875|unsuper_loss: 0.0
-average reward score: 3.19921875
--------------------------------------------------------------------------------------
-|E2E latency=3.18s |Gather latency=0.00s (0.00%) |Generate time=1.75s (55.13%) |Training time=0.52s (16.37%) |Others=0.91 (28.50%)|CurSamplesPerSec=10.07 |AvgSamplesPerSec=14.27
-epoch: 0|step: 123|ppo_ep: 1|act_loss: -0.057373046875|cri_loss: -0.022796630859375|unsuper_loss: 0.0
-average reward score: 3.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.49%) |Training time=0.50s (22.68%) |Others=0.11 (4.82%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.27
-epoch: 0|step: 124|ppo_ep: 1|act_loss: 0.162109375|cri_loss: 0.1400146484375|unsuper_loss: 0.0
-average reward score: 4.0703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.00%) |Training time=0.49s (22.40%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.27
-epoch: 0|step: 125|ppo_ep: 1|act_loss: 0.07275390625|cri_loss: 0.042236328125|unsuper_loss: 0.0
-average reward score: 4.015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.89%) |Training time=0.49s (22.46%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.28
-epoch: 0|step: 126|ppo_ep: 1|act_loss: -0.01470184326171875|cri_loss: -0.003173828125|unsuper_loss: 0.0
-average reward score: 3.93359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.49s (22.36%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.28
-epoch: 0|step: 127|ppo_ep: 1|act_loss: 0.09210205078125|cri_loss: 0.05419921875|unsuper_loss: 0.0
-average reward score: 3.919921875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.68%) |Training time=0.50s (22.76%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.28
-epoch: 0|step: 128|ppo_ep: 1|act_loss: 0.0026092529296875|cri_loss: 0.01064300537109375|unsuper_loss: 0.0
-average reward score: 3.953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.71%) |Training time=0.49s (22.66%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.29
-[2023-04-14 08:52:12,913] [INFO] [logging.py:96:log_dist] [Rank 0] step=130, skipped=4, lr=[9.649758270407744e-06, 9.649758270407744e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 08:52:12,931] [INFO] [timer.py:199:stop] epoch=0/micro_step=130/global_step=130, RunningAvgSamplesPerSec=100.66527352211149, CurrSamplesPerSec=96.38733686515383, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 08:52:13,024] [INFO] [logging.py:96:log_dist] [Rank 0] step=130, skipped=4, lr=[4.999874751506603e-06, 4.999874751506603e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 129|ppo_ep: 1|act_loss: 0.0672607421875|cri_loss: 0.040771484375|unsuper_loss: 0.0
-average reward score: 4.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.90%) |Training time=0.49s (22.53%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.29
-epoch: 0|step: 130|ppo_ep: 1|act_loss: -0.0712890625|cri_loss: -0.033447265625|unsuper_loss: 0.0
-average reward score: 4.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.99%) |Training time=0.49s (22.39%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.29
-epoch: 0|step: 131|ppo_ep: 1|act_loss: -0.0435791015625|cri_loss: -0.015869140625|unsuper_loss: 0.0
-average reward score: 4.5234375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.73%) |Training time=0.50s (22.74%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.30
-epoch: 0|step: 132|ppo_ep: 1|act_loss: 0.0189971923828125|cri_loss: 0.0142822265625|unsuper_loss: 0.0
-average reward score: 3.80859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.03%) |Training time=0.49s (22.35%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.30
-epoch: 0|step: 133|ppo_ep: 1|act_loss: 0.0258636474609375|cri_loss: 0.017120361328125|unsuper_loss: 0.0
-average reward score: 4.05078125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.54%) |Training time=0.50s (22.89%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.30
-epoch: 0|step: 134|ppo_ep: 1|act_loss: -0.128662109375|cri_loss: -0.00958251953125|unsuper_loss: 0.0
-average reward score: 4.80078125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.77%) |Training time=0.49s (22.54%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.30
-epoch: 0|step: 135|ppo_ep: 1|act_loss: -0.119384765625|cri_loss: -0.05499267578125|unsuper_loss: 0.0
-average reward score: 3.833984375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.76%) |Training time=0.49s (22.65%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.31
-epoch: 0|step: 136|ppo_ep: 1|act_loss: -0.12493896484375|cri_loss: -0.05194091796875|unsuper_loss: 0.0
-average reward score: 3.76171875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.74%) |Training time=0.49s (22.60%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.31
-epoch: 0|step: 137|ppo_ep: 1|act_loss: 0.04376220703125|cri_loss: 0.0254669189453125|unsuper_loss: 0.0
-average reward score: 4.1484375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.81%) |Training time=0.49s (22.38%) |Others=0.10 (4.81%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.31
-epoch: 0|step: 138|ppo_ep: 1|act_loss: -0.00145721435546875|cri_loss: 0.002796173095703125|unsuper_loss: 0.0
-average reward score: 4.17578125
--------------------------------------------------------------------------------------
-|E2E latency=2.38s |Gather latency=0.00s (0.00%) |Generate time=1.65s (69.33%) |Training time=0.61s (25.81%) |Others=0.12 (4.85%)|CurSamplesPerSec=13.46 |AvgSamplesPerSec=14.31
-[2023-04-14 08:52:35,041] [INFO] [logging.py:96:log_dist] [Rank 0] step=140, skipped=4, lr=[9.649536569301218e-06, 9.649536569301218e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 08:52:35,060] [INFO] [timer.py:199:stop] epoch=0/micro_step=140/global_step=140, RunningAvgSamplesPerSec=99.93906583355329, CurrSamplesPerSec=69.75915377003102, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 08:52:35,152] [INFO] [logging.py:96:log_dist] [Rank 0] step=140, skipped=4, lr=[4.9997598804669524e-06, 4.9997598804669524e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 139|ppo_ep: 1|act_loss: -0.0291748046875|cri_loss: -0.00913238525390625|unsuper_loss: 0.0
-average reward score: 4.02734375
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.60s (68.45%) |Training time=0.64s (27.28%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.30
-epoch: 0|step: 140|ppo_ep: 1|act_loss: 0.0887451171875|cri_loss: 0.0537109375|unsuper_loss: 0.0
-average reward score: 4.5234375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.62%) |Training time=0.50s (22.80%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.30
-epoch: 0|step: 141|ppo_ep: 1|act_loss: -0.0107574462890625|cri_loss: -0.003376007080078125|unsuper_loss: 0.0
-average reward score: 4.21484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.87%) |Training time=0.49s (22.62%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.31
-epoch: 0|step: 142|ppo_ep: 1|act_loss: -0.044647216796875|cri_loss: -0.01788330078125|unsuper_loss: 0.0
-average reward score: 5.0234375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.71%) |Training time=0.49s (22.69%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.31
-epoch: 0|step: 143|ppo_ep: 1|act_loss: 0.0205841064453125|cri_loss: 0.013397216796875|unsuper_loss: 0.0
-average reward score: 3.9609375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.21%) |Training time=0.51s (23.10%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.31
-epoch: 0|step: 144|ppo_ep: 1|act_loss: -0.0972900390625|cri_loss: -0.045196533203125|unsuper_loss: 0.0
-average reward score: 4.68359375
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.70%) |Training time=0.52s (23.78%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.31
-epoch: 0|step: 145|ppo_ep: 1|act_loss: -0.054412841796875|cri_loss: -0.023834228515625|unsuper_loss: 0.0
-average reward score: 5.06640625
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.83%) |Training time=0.52s (23.63%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.54 |AvgSamplesPerSec=14.31
-epoch: 0|step: 146|ppo_ep: 1|act_loss: 0.040130615234375|cri_loss: 0.0330810546875|unsuper_loss: 0.0
-average reward score: 4.31640625
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.13%) |Training time=0.51s (23.32%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.32
-epoch: 0|step: 147|ppo_ep: 1|act_loss: -0.01995849609375|cri_loss: -0.001068115234375|unsuper_loss: 0.0
-average reward score: 4.13671875
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.60%) |Training time=0.53s (23.86%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.32
-epoch: 0|step: 148|ppo_ep: 1|act_loss: 0.0291748046875|cri_loss: 0.0229949951171875|unsuper_loss: 0.0
-average reward score: 5.375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.05%) |Training time=0.51s (23.41%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.32
-[2023-04-14 08:52:56,981] [INFO] [logging.py:96:log_dist] [Rank 0] step=150, skipped=4, lr=[9.649243356924173e-06, 9.649243356924173e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 08:52:57,000] [INFO] [timer.py:199:stop] epoch=0/micro_step=150/global_step=150, RunningAvgSamplesPerSec=99.35793580311184, CurrSamplesPerSec=91.16621508445346, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 08:52:57,092] [INFO] [logging.py:96:log_dist] [Rank 0] step=150, skipped=4, lr=[4.99960795695553e-06, 4.99960795695553e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 149|ppo_ep: 1|act_loss: -0.01128387451171875|cri_loss: 0.0003204345703125|unsuper_loss: 0.0
-average reward score: 5.21484375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.14%) |Training time=0.51s (23.33%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.32
-epoch: 0|step: 150|ppo_ep: 1|act_loss: 0.0914306640625|cri_loss: 0.050262451171875|unsuper_loss: 0.0
-average reward score: 5.7265625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.35%) |Training time=0.51s (23.07%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.32
-epoch: 0|step: 151|ppo_ep: 1|act_loss: 0.03863525390625|cri_loss: 0.022003173828125|unsuper_loss: 0.0
-average reward score: 4.93359375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.25%) |Training time=0.51s (23.04%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.32
-epoch: 0|step: 152|ppo_ep: 1|act_loss: -0.0318603515625|cri_loss: -0.01226806640625|unsuper_loss: 0.0
-average reward score: 5.48828125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.73%) |Training time=0.50s (22.70%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.33
-epoch: 0|step: 153|ppo_ep: 1|act_loss: 0.026123046875|cri_loss: 0.0231781005859375|unsuper_loss: 0.0
-average reward score: 6.06640625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.58%) |Training time=0.50s (22.94%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.33
-epoch: 0|step: 154|ppo_ep: 1|act_loss: -0.1361083984375|cri_loss: -0.059722900390625|unsuper_loss: 0.0
-average reward score: 6.58984375
--------------------------------------------------------------------------------------
-|E2E latency=2.38s |Gather latency=0.00s (0.00%) |Generate time=1.77s (74.35%) |Training time=0.51s (21.45%) |Others=0.10 (4.20%)|CurSamplesPerSec=13.46 |AvgSamplesPerSec=14.32
-epoch: 0|step: 155|ppo_ep: 1|act_loss: 0.053131103515625|cri_loss: 0.0291748046875|unsuper_loss: 0.0
-average reward score: 6.1796875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.24%) |Training time=0.51s (23.20%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.32
-epoch: 0|step: 156|ppo_ep: 1|act_loss: 0.0271148681640625|cri_loss: 0.018280029296875|unsuper_loss: 0.0
-average reward score: 5.79296875
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.74%) |Training time=0.53s (23.75%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.32
-epoch: 0|step: 157|ppo_ep: 1|act_loss: 0.05975341796875|cri_loss: 0.036865234375|unsuper_loss: 0.0
-average reward score: 6.02734375
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.59s (68.33%) |Training time=0.50s (21.58%) |Others=0.23 (10.09%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.32
-epoch: 0|step: 158|ppo_ep: 1|act_loss: 0.025848388671875|cri_loss: 0.0191802978515625|unsuper_loss: 0.0
-average reward score: 5.90625
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.08%) |Training time=0.51s (23.36%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.32
-[2023-04-14 08:53:19,249] [INFO] [logging.py:96:log_dist] [Rank 0] step=160, skipped=4, lr=[9.648878637622726e-06, 9.648878637622726e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 08:53:19,268] [INFO] [timer.py:199:stop] epoch=0/micro_step=160/global_step=160, RunningAvgSamplesPerSec=98.90795954004966, CurrSamplesPerSec=93.22086815920744, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 08:53:19,360] [INFO] [logging.py:96:log_dist] [Rank 0] step=160, skipped=4, lr=[4.99941898322421e-06, 4.99941898322421e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 159|ppo_ep: 1|act_loss: 0.0384521484375|cri_loss: 0.0228424072265625|unsuper_loss: 0.0
-average reward score: 7.49609375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.33%) |Training time=0.51s (23.12%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.32
-epoch: 0|step: 160|ppo_ep: 1|act_loss: 0.0286102294921875|cri_loss: 0.020538330078125|unsuper_loss: 0.0
-average reward score: 5.2734375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.16%) |Training time=0.51s (23.28%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.32
-epoch: 0|step: 161|ppo_ep: 1|act_loss: -0.0103302001953125|cri_loss: -0.00080108642578125|unsuper_loss: 0.0
-average reward score: 5.40234375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.35%) |Training time=0.51s (23.17%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.33
-epoch: 0|step: 162|ppo_ep: 1|act_loss: 0.14404296875|cri_loss: 0.08203125|unsuper_loss: 0.0
-average reward score: 6.02734375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.39%) |Training time=0.50s (23.04%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.33
-epoch: 0|step: 163|ppo_ep: 1|act_loss: 0.0390625|cri_loss: 0.0228118896484375|unsuper_loss: 0.0
-average reward score: 5.38671875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.72%) |Training time=0.50s (22.77%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.33
-epoch: 0|step: 164|ppo_ep: 1|act_loss: -0.00506591796875|cri_loss: 0.0015716552734375|unsuper_loss: 0.0
-average reward score: 5.30859375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.39%) |Training time=0.50s (23.05%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.33
-epoch: 0|step: 165|ppo_ep: 1|act_loss: -0.0205078125|cri_loss: -0.007106781005859375|unsuper_loss: 0.0
-average reward score: 5.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.27%) |Training time=0.51s (23.17%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.33
-epoch: 0|step: 166|ppo_ep: 1|act_loss: 0.03875732421875|cri_loss: 0.021942138671875|unsuper_loss: 0.0
-average reward score: 5.3671875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.58%) |Training time=0.50s (22.84%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.34
-epoch: 0|step: 167|ppo_ep: 1|act_loss: -0.048583984375|cri_loss: -0.0202789306640625|unsuper_loss: 0.0
-average reward score: 4.83984375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.05%) |Training time=0.49s (22.47%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.34
-epoch: 0|step: 168|ppo_ep: 1|act_loss: 0.0002899169921875|cri_loss: 0.0030384063720703125|unsuper_loss: 0.0
-average reward score: 5.38671875
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.61s (72.58%) |Training time=0.50s (22.31%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.39 |AvgSamplesPerSec=14.34
-[2023-04-14 08:53:41,327] [INFO] [logging.py:96:log_dist] [Rank 0] step=170, skipped=4, lr=[9.648442416802894e-06, 9.648442416802894e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 08:53:41,345] [INFO] [timer.py:199:stop] epoch=0/micro_step=170/global_step=170, RunningAvgSamplesPerSec=98.61371514796177, CurrSamplesPerSec=93.73256923212298, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 08:53:41,442] [INFO] [logging.py:96:log_dist] [Rank 0] step=170, skipped=4, lr=[4.999192962074038e-06, 4.999192962074038e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 169|ppo_ep: 1|act_loss: 0.0283203125|cri_loss: 0.018218994140625|unsuper_loss: 0.0
-average reward score: 5.25
--------------------------------------------------------------------------------------
-|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.29%) |Training time=0.50s (21.39%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.58 |AvgSamplesPerSec=14.33
-epoch: 0|step: 170|ppo_ep: 1|act_loss: -0.010101318359375|cri_loss: -0.0031566619873046875|unsuper_loss: 0.0
-average reward score: 5.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (22.03%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.34
-epoch: 0|step: 171|ppo_ep: 1|act_loss: -0.04736328125|cri_loss: -0.0209808349609375|unsuper_loss: 0.0
-average reward score: 5.2578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (22.03%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.34
-epoch: 0|step: 172|ppo_ep: 1|act_loss: -0.06488037109375|cri_loss: -0.029296875|unsuper_loss: 0.0
-average reward score: 5.10546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.40%) |Training time=0.47s (21.95%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.34
-epoch: 0|step: 173|ppo_ep: 1|act_loss: -0.0254974365234375|cri_loss: -0.0096588134765625|unsuper_loss: 0.0
-average reward score: 5.57421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.91%) |Training time=0.49s (22.49%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
-epoch: 0|step: 174|ppo_ep: 1|act_loss: -0.0384521484375|cri_loss: -0.0167694091796875|unsuper_loss: 0.0
-average reward score: 5.47265625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.74%) |Training time=0.47s (21.59%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.35
-epoch: 0|step: 175|ppo_ep: 1|act_loss: -0.004512786865234375|cri_loss: -0.000179290771484375|unsuper_loss: 0.0
-average reward score: 5.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.89%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.35
-epoch: 0|step: 176|ppo_ep: 1|act_loss: 0.031982421875|cri_loss: 0.019287109375|unsuper_loss: 0.0
-average reward score: 5.171875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.90%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.35
-epoch: 0|step: 177|ppo_ep: 1|act_loss: 0.0201873779296875|cri_loss: 0.01238250732421875|unsuper_loss: 0.0
-average reward score: 5.04296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.62%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.36
-epoch: 0|step: 178|ppo_ep: 1|act_loss: 0.0927734375|cri_loss: 0.04998779296875|unsuper_loss: 0.0
-average reward score: 5.0625
--------------------------------------------------------------------------------------
-|E2E latency=2.40s |Gather latency=0.00s (0.00%) |Generate time=1.58s (66.01%) |Training time=0.49s (20.57%) |Others=0.32 (13.42%)|CurSamplesPerSec=13.36 |AvgSamplesPerSec=14.35
-[2023-04-14 08:54:03,202] [INFO] [logging.py:96:log_dist] [Rank 0] step=180, skipped=4, lr=[9.647934700930525e-06, 9.647934700930525e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 08:54:03,220] [INFO] [timer.py:199:stop] epoch=0/micro_step=180/global_step=180, RunningAvgSamplesPerSec=98.71604805374989, CurrSamplesPerSec=87.31243836243797, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 08:54:03,313] [INFO] [logging.py:96:log_dist] [Rank 0] step=180, skipped=4, lr=[4.998929896855195e-06, 4.998929896855195e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 179|ppo_ep: 1|act_loss: 0.01641845703125|cri_loss: 0.0104217529296875|unsuper_loss: 0.0
-average reward score: 5.8515625
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.49%) |Training time=0.53s (23.88%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.42 |AvgSamplesPerSec=14.35
-epoch: 0|step: 180|ppo_ep: 1|act_loss: -0.001728057861328125|cri_loss: 0.001316070556640625|unsuper_loss: 0.0
-average reward score: 5.23046875
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.03%) |Training time=0.50s (22.31%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.40 |AvgSamplesPerSec=14.35
-epoch: 0|step: 181|ppo_ep: 1|act_loss: -0.0086212158203125|cri_loss: -0.0017242431640625|unsuper_loss: 0.0
-average reward score: 5.5390625
--------------------------------------------------------------------------------------
-|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.65s (73.73%) |Training time=0.48s (21.66%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.32 |AvgSamplesPerSec=14.35
-epoch: 0|step: 182|ppo_ep: 1|act_loss: 0.02783203125|cri_loss: 0.0161590576171875|unsuper_loss: 0.0
-average reward score: 5.4609375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.40%) |Training time=0.44s (19.93%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.35
-epoch: 0|step: 183|ppo_ep: 1|act_loss: -0.01708984375|cri_loss: -0.005275726318359375|unsuper_loss: 0.0
-average reward score: 5.3515625
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.05%) |Training time=0.50s (22.47%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.51 |AvgSamplesPerSec=14.35
-epoch: 0|step: 184|ppo_ep: 1|act_loss: -0.0195159912109375|cri_loss: -0.008209228515625|unsuper_loss: 0.0
-average reward score: 4.60546875
--------------------------------------------------------------------------------------
-|E2E latency=2.38s |Gather latency=0.00s (0.00%) |Generate time=1.82s (76.48%) |Training time=0.46s (19.21%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.47 |AvgSamplesPerSec=14.35
-epoch: 0|step: 185|ppo_ep: 1|act_loss: -0.00017547607421875|cri_loss: 0.0018596649169921875|unsuper_loss: 0.0
-average reward score: 5.6640625
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.65s (74.95%) |Training time=0.45s (20.43%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.35
-[2023-04-14 08:54:18,825] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, but hysteresis is 2. Reducing hysteresis to 1
-[2023-04-14 08:54:18,911] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 186|ppo_ep: 1|act_loss: -0.01111602783203125|cri_loss: -0.004123687744140625|unsuper_loss: 0.0
-average reward score: 4.5703125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.23%) |Training time=0.42s (19.42%) |Others=0.09 (4.35%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.35
-epoch: 0|step: 187|ppo_ep: 1|act_loss: -0.0054168701171875|cri_loss: -0.000263214111328125|unsuper_loss: 0.0
-average reward score: 4.9609375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.84%) |Training time=0.47s (21.53%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.35
-epoch: 0|step: 188|ppo_ep: 1|act_loss: 0.01232147216796875|cri_loss: 0.01074981689453125|unsuper_loss: 0.0
-average reward score: 6.3125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.84%) |Training time=0.47s (21.45%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.35
-[2023-04-14 08:54:25,388] [INFO] [logging.py:96:log_dist] [Rank 0] step=190, skipped=5, lr=[9.647416634573466e-06, 9.647416634573466e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 08:54:25,406] [INFO] [timer.py:199:stop] epoch=0/micro_step=190/global_step=190, RunningAvgSamplesPerSec=99.07856090082578, CurrSamplesPerSec=102.86956528403809, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 08:54:25,499] [INFO] [logging.py:96:log_dist] [Rank 0] step=190, skipped=5, lr=[4.998661468690914e-06, 4.998661468690914e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 189|ppo_ep: 1|act_loss: -0.072509765625|cri_loss: -0.030487060546875|unsuper_loss: 0.0
-average reward score: 6.27734375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.93%) |Training time=0.47s (21.60%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.35
-epoch: 0|step: 190|ppo_ep: 1|act_loss: -0.04168701171875|cri_loss: -0.017730712890625|unsuper_loss: 0.0
-average reward score: 5.90625
--------------------------------------------------------------------------------------
-|E2E latency=2.84s |Gather latency=0.00s (0.00%) |Generate time=1.62s (57.15%) |Training time=0.47s (16.46%) |Others=0.75 (26.39%)|CurSamplesPerSec=11.28 |AvgSamplesPerSec=14.33
-epoch: 0|step: 191|ppo_ep: 1|act_loss: 0.0439453125|cri_loss: 0.028289794921875|unsuper_loss: 0.0
-average reward score: 6.421875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.23%) |Training time=0.46s (21.21%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.33
-epoch: 0|step: 192|ppo_ep: 1|act_loss: 0.000202178955078125|cri_loss: 0.003143310546875|unsuper_loss: 0.0
-average reward score: 5.27734375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.94%) |Training time=0.47s (21.45%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.34
-epoch: 0|step: 193|ppo_ep: 1|act_loss: 0.0016498565673828125|cri_loss: 0.00395965576171875|unsuper_loss: 0.0
-average reward score: 6.703125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.34%) |Training time=0.46s (21.02%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.34
-epoch: 0|step: 194|ppo_ep: 1|act_loss: -0.04425048828125|cri_loss: -0.016937255859375|unsuper_loss: 0.0
-average reward score: 6.85546875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.45%) |Training time=0.46s (20.94%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.34
-epoch: 0|step: 195|ppo_ep: 1|act_loss: -0.0312347412109375|cri_loss: -0.0128936767578125|unsuper_loss: 0.0
-average reward score: 6.02734375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.63%) |Training time=0.46s (20.86%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.34
-epoch: 0|step: 196|ppo_ep: 1|act_loss: 0.04901123046875|cri_loss: 0.030487060546875|unsuper_loss: 0.0
-average reward score: 6.328125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.86%) |Training time=0.43s (19.50%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.34
-epoch: 0|step: 197|ppo_ep: 1|act_loss: 0.0048675537109375|cri_loss: 0.004283905029296875|unsuper_loss: 0.0
-average reward score: 7.109375
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.70s (73.37%) |Training time=0.51s (22.19%) |Others=0.10 (4.44%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.34
-epoch: 0|step: 198|ppo_ep: 1|act_loss: 0.027496337890625|cri_loss: 0.01763916015625|unsuper_loss: 0.0
-average reward score: 6.359375
--------------------------------------------------------------------------------------
-|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.62s (72.68%) |Training time=0.47s (20.95%) |Others=0.14 (6.38%)|CurSamplesPerSec=14.33 |AvgSamplesPerSec=14.34
-[2023-04-14 08:54:48,213] [INFO] [logging.py:96:log_dist] [Rank 0] step=200, skipped=5, lr=[9.646773099710006e-06, 9.646773099710006e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 08:54:48,231] [INFO] [timer.py:199:stop] epoch=0/micro_step=200/global_step=200, RunningAvgSamplesPerSec=99.47785048462806, CurrSamplesPerSec=113.295647388129, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 08:54:48,323] [INFO] [logging.py:96:log_dist] [Rank 0] step=200, skipped=5, lr=[4.998328030937827e-06, 4.998328030937827e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 199|ppo_ep: 1|act_loss: -0.10589599609375|cri_loss: -0.049896240234375|unsuper_loss: 0.0
-average reward score: 6.046875
--------------------------------------------------------------------------------------
-|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.76s (76.27%) |Training time=0.45s (19.35%) |Others=0.10 (4.38%)|CurSamplesPerSec=13.90 |AvgSamplesPerSec=14.34
-epoch: 0|step: 200|ppo_ep: 1|act_loss: 0.00897216796875|cri_loss: 0.0068206787109375|unsuper_loss: 0.0
-average reward score: 5.69140625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.10%) |Training time=0.47s (21.29%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.34
-epoch: 0|step: 201|ppo_ep: 1|act_loss: 9.5367431640625e-07|cri_loss: 0.0008797645568847656|unsuper_loss: 0.0
-average reward score: 5.99609375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.91%) |Training time=0.47s (21.50%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.34
-epoch: 0|step: 202|ppo_ep: 1|act_loss: -0.0102386474609375|cri_loss: -0.003673553466796875|unsuper_loss: 0.0
-average reward score: 5.32421875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.84%) |Training time=0.47s (21.48%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.34
-epoch: 0|step: 203|ppo_ep: 1|act_loss: -0.0355224609375|cri_loss: -0.014923095703125|unsuper_loss: 0.0
-average reward score: 4.9296875
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.86%) |Training time=0.48s (21.66%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.34
-epoch: 0|step: 204|ppo_ep: 1|act_loss: -0.04193115234375|cri_loss: -0.0184326171875|unsuper_loss: 0.0
-average reward score: 5.953125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.88%) |Training time=0.47s (21.52%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.34
-epoch: 0|step: 205|ppo_ep: 1|act_loss: 0.03240966796875|cri_loss: 0.0181121826171875|unsuper_loss: 0.0
-average reward score: 5.54296875
--------------------------------------------------------------------------------------
-|E2E latency=2.62s |Gather latency=0.00s (0.00%) |Generate time=1.63s (62.21%) |Training time=0.47s (17.92%) |Others=0.52 (19.87%)|CurSamplesPerSec=12.23 |AvgSamplesPerSec=14.33
-epoch: 0|step: 206|ppo_ep: 1|act_loss: 0.05206298828125|cri_loss: 0.0300445556640625|unsuper_loss: 0.0
-average reward score: 5.921875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.11%) |Training time=0.47s (21.30%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.33
-epoch: 0|step: 207|ppo_ep: 1|act_loss: -0.00235748291015625|cri_loss: 0.000972747802734375|unsuper_loss: 0.0
-average reward score: 5.6015625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.94%) |Training time=0.47s (21.55%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.33
-epoch: 0|step: 208|ppo_ep: 1|act_loss: 0.000396728515625|cri_loss: 0.006542205810546875|unsuper_loss: 0.0
-average reward score: 6.625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.83%) |Training time=0.47s (21.59%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.33
-[2023-04-14 08:55:10,546] [INFO] [logging.py:96:log_dist] [Rank 0] step=210, skipped=5, lr=[9.646058094537316e-06, 9.646058094537316e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 08:55:10,564] [INFO] [timer.py:199:stop] epoch=0/micro_step=210/global_step=210, RunningAvgSamplesPerSec=99.69265405081147, CurrSamplesPerSec=104.76069635587082, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 08:55:10,657] [INFO] [logging.py:96:log_dist] [Rank 0] step=210, skipped=5, lr=[4.997957561936433e-06, 4.997957561936433e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 209|ppo_ep: 1|act_loss: -0.060089111328125|cri_loss: -0.027008056640625|unsuper_loss: 0.0
-average reward score: 5.8125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.15%) |Training time=0.47s (21.36%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.34
-epoch: 0|step: 210|ppo_ep: 1|act_loss: -0.004329681396484375|cri_loss: 0.000911712646484375|unsuper_loss: 0.0
-average reward score: 6.90625
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.43%) |Training time=0.48s (21.94%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.51 |AvgSamplesPerSec=14.34
-epoch: 0|step: 211|ppo_ep: 1|act_loss: -0.03790283203125|cri_loss: -0.016845703125|unsuper_loss: 0.0
-average reward score: 7.890625
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.05%) |Training time=0.45s (20.46%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.34
-epoch: 0|step: 212|ppo_ep: 1|act_loss: -0.022186279296875|cri_loss: -0.0086212158203125|unsuper_loss: 0.0
-average reward score: 6.6640625
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.65s (74.46%) |Training time=0.46s (20.99%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.34
-epoch: 0|step: 213|ppo_ep: 1|act_loss: 0.007564544677734375|cri_loss: 0.007236480712890625|unsuper_loss: 0.0
-average reward score: 5.8203125
--------------------------------------------------------------------------------------
-|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.07%) |Training time=0.47s (19.74%) |Others=0.10 (4.18%)|CurSamplesPerSec=13.58 |AvgSamplesPerSec=14.33
-epoch: 0|step: 214|ppo_ep: 1|act_loss: 0.022003173828125|cri_loss: 0.01690673828125|unsuper_loss: 0.0
-average reward score: 6.3515625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.98%) |Training time=0.47s (21.41%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.34
-epoch: 0|step: 215|ppo_ep: 1|act_loss: -0.10302734375|cri_loss: -0.040618896484375|unsuper_loss: 0.0
-average reward score: 5.890625
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.26%) |Training time=0.46s (21.16%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.34
-epoch: 0|step: 216|ppo_ep: 1|act_loss: -0.0308685302734375|cri_loss: -0.0083160400390625|unsuper_loss: 0.0
-average reward score: 5.8828125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.06%) |Training time=0.46s (21.18%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.34
-epoch: 0|step: 217|ppo_ep: 1|act_loss: 0.0677490234375|cri_loss: 0.042144775390625|unsuper_loss: 0.0
-average reward score: 5.3671875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.99%) |Training time=0.47s (21.39%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.34
-epoch: 0|step: 218|ppo_ep: 1|act_loss: 0.1029052734375|cri_loss: 0.0599365234375|unsuper_loss: 0.0
-average reward score: 6.484375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.53%) |Training time=0.48s (21.86%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.34
-[2023-04-14 08:55:32,679] [INFO] [logging.py:96:log_dist] [Rank 0] step=220, skipped=5, lr=[9.645271629653494e-06, 9.645271629653494e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 08:55:32,901] [INFO] [timer.py:199:stop] epoch=0/micro_step=220/global_step=220, RunningAvgSamplesPerSec=99.60717426750885, CurrSamplesPerSec=60.76466806591054, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 08:55:32,994] [INFO] [logging.py:96:log_dist] [Rank 0] step=220, skipped=5, lr=[4.9975500671779765e-06, 4.9975500671779765e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 219|ppo_ep: 1|act_loss: 0.13671875|cri_loss: 0.07745361328125|unsuper_loss: 0.0
-average reward score: 6.9453125
--------------------------------------------------------------------------------------
-|E2E latency=2.41s |Gather latency=0.00s (0.00%) |Generate time=1.62s (67.15%) |Training time=0.69s (28.68%) |Others=0.10 (4.17%)|CurSamplesPerSec=13.30 |AvgSamplesPerSec=14.34
-epoch: 0|step: 220|ppo_ep: 1|act_loss: 0.0733642578125|cri_loss: 0.04412841796875|unsuper_loss: 0.0
-average reward score: 7.16796875
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.43%) |Training time=0.46s (20.97%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.34
-[2023-04-14 08:55:37,263] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192
-epoch: 0|step: 221|ppo_ep: 1|act_loss: -0.236328125|cri_loss: -0.09283447265625|unsuper_loss: 0.0
-average reward score: 5.28125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.44%) |Training time=0.43s (19.93%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.34
-epoch: 0|step: 222|ppo_ep: 1|act_loss: -0.042388916015625|cri_loss: -0.016998291015625|unsuper_loss: 0.0
-average reward score: 6.92578125
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.33%) |Training time=0.46s (21.08%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.34
-epoch: 0|step: 223|ppo_ep: 1|act_loss: -0.0716552734375|cri_loss: -0.0293731689453125|unsuper_loss: 0.0
-average reward score: 5.84375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.29%) |Training time=0.47s (21.22%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.34
-epoch: 0|step: 224|ppo_ep: 1|act_loss: 0.0211181640625|cri_loss: 0.01495361328125|unsuper_loss: 0.0
-average reward score: 7.453125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.36%) |Training time=0.46s (21.05%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.34
-epoch: 0|step: 225|ppo_ep: 1|act_loss: 0.139404296875|cri_loss: 0.08172607421875|unsuper_loss: 0.0
-average reward score: 6.4296875
--------------------------------------------------------------------------------------
-|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.64s (73.61%) |Training time=0.47s (21.21%) |Others=0.12 (5.18%)|CurSamplesPerSec=14.36 |AvgSamplesPerSec=14.34
-epoch: 0|step: 226|ppo_ep: 1|act_loss: 0.04437255859375|cri_loss: 0.02532958984375|unsuper_loss: 0.0
-average reward score: 4.3671875
--------------------------------------------------------------------------------------
-|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.76s (76.61%) |Training time=0.44s (18.98%) |Others=0.10 (4.41%)|CurSamplesPerSec=13.94 |AvgSamplesPerSec=14.34
-epoch: 0|step: 227|ppo_ep: 1|act_loss: -0.06201171875|cri_loss: -0.0190582275390625|unsuper_loss: 0.0
-average reward score: 5.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.41%) |Training time=0.49s (22.11%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.34
-epoch: 0|step: 228|ppo_ep: 1|act_loss: -0.092041015625|cri_loss: -0.03985595703125|unsuper_loss: 0.0
-average reward score: 5.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.62s (70.56%) |Training time=0.49s (21.35%) |Others=0.19 (8.08%)|CurSamplesPerSec=13.92 |AvgSamplesPerSec=14.34
-[2023-04-14 08:55:55,060] [INFO] [logging.py:96:log_dist] [Rank 0] step=230, skipped=6, lr=[9.64450272281792e-06, 9.64450272281792e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 08:55:55,078] [INFO] [timer.py:199:stop] epoch=0/micro_step=230/global_step=230, RunningAvgSamplesPerSec=99.88061376855845, CurrSamplesPerSec=101.70929986715883, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 08:55:55,171] [INFO] [logging.py:96:log_dist] [Rank 0] step=230, skipped=5, lr=[4.997105552702513e-06, 4.997105552702513e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 229|ppo_ep: 1|act_loss: 0.03997802734375|cri_loss: 0.023681640625|unsuper_loss: 0.0
-average reward score: 5.828125
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.66%) |Training time=0.48s (21.68%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.34
-epoch: 0|step: 230|ppo_ep: 1|act_loss: 0.036895751953125|cri_loss: 0.02520751953125|unsuper_loss: 0.0
-average reward score: 4.91015625
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.57%) |Training time=0.48s (21.84%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.51 |AvgSamplesPerSec=14.34
-epoch: 0|step: 231|ppo_ep: 1|act_loss: 0.041748046875|cri_loss: 0.02581787109375|unsuper_loss: 0.0
-average reward score: 4.92578125
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.91%) |Training time=0.47s (21.52%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.34
-epoch: 0|step: 232|ppo_ep: 1|act_loss: 0.1104736328125|cri_loss: 0.0606689453125|unsuper_loss: 0.0
-average reward score: 5.625
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.38%) |Training time=0.49s (22.09%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.43 |AvgSamplesPerSec=14.34
-epoch: 0|step: 233|ppo_ep: 1|act_loss: 0.022430419921875|cri_loss: 0.0141143798828125|unsuper_loss: 0.0
-average reward score: 4.78125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.80%) |Training time=0.48s (21.73%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.34
-epoch: 0|step: 234|ppo_ep: 1|act_loss: -0.097412109375|cri_loss: -0.044830322265625|unsuper_loss: 0.0
-average reward score: 5.31640625
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.62s (69.95%) |Training time=0.48s (20.82%) |Others=0.21 (9.22%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.34
-epoch: 0|step: 235|ppo_ep: 1|act_loss: 0.1146240234375|cri_loss: 0.08062744140625|unsuper_loss: 0.0
-average reward score: 4.8515625
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.78%) |Training time=0.48s (21.63%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.34
-epoch: 0|step: 236|ppo_ep: 1|act_loss: 0.098876953125|cri_loss: 0.0562744140625|unsuper_loss: 0.0
-average reward score: 4.9140625
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.00%) |Training time=0.47s (21.42%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.34
-epoch: 0|step: 237|ppo_ep: 1|act_loss: 0.246826171875|cri_loss: 0.1407470703125|unsuper_loss: 0.0
-average reward score: 4.5625
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.42%) |Training time=0.49s (22.03%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.34
-epoch: 0|step: 238|ppo_ep: 1|act_loss: -0.043609619140625|cri_loss: -0.010223388671875|unsuper_loss: 0.0
-average reward score: 4.8359375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.62%) |Training time=0.48s (21.80%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.34
-[2023-04-14 08:56:17,202] [INFO] [logging.py:96:log_dist] [Rank 0] step=240, skipped=6, lr=[9.643580517474126e-06, 9.643580517474126e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 08:56:17,221] [INFO] [timer.py:199:stop] epoch=0/micro_step=240/global_step=240, RunningAvgSamplesPerSec=99.92462431230227, CurrSamplesPerSec=100.35225418814385, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 08:56:17,314] [INFO] [logging.py:96:log_dist] [Rank 0] step=240, skipped=5, lr=[4.996624025098819e-06, 4.996624025098819e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 239|ppo_ep: 1|act_loss: -0.1341552734375|cri_loss: -0.050537109375|unsuper_loss: 0.0
-average reward score: 4.5546875
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.61%) |Training time=0.48s (21.84%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.34
-epoch: 0|step: 240|ppo_ep: 1|act_loss: -0.0472412109375|cri_loss: -0.0107421875|unsuper_loss: 0.0
-average reward score: 4.12109375
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.17%) |Training time=0.46s (20.69%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.46 |AvgSamplesPerSec=14.34
-epoch: 0|step: 241|ppo_ep: 1|act_loss: -0.02374267578125|cri_loss: -0.00562286376953125|unsuper_loss: 0.0
-average reward score: 3.810546875
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.20%) |Training time=0.47s (21.24%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.35
-epoch: 0|step: 242|ppo_ep: 1|act_loss: 0.135498046875|cri_loss: 0.0750732421875|unsuper_loss: 0.0
-average reward score: 3.974609375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.54%) |Training time=0.48s (21.88%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.35
-epoch: 0|step: 243|ppo_ep: 1|act_loss: 0.01165008544921875|cri_loss: 0.016204833984375|unsuper_loss: 0.0
-average reward score: 4.26953125
--------------------------------------------------------------------------------------
-|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.81s (76.71%) |Training time=0.45s (19.00%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.53 |AvgSamplesPerSec=14.34
-epoch: 0|step: 244|ppo_ep: 1|act_loss: 0.11224365234375|cri_loss: 0.061309814453125|unsuper_loss: 0.0
-average reward score: 4.0625
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.90%) |Training time=0.47s (21.52%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.34
-epoch: 0|step: 245|ppo_ep: 1|act_loss: 0.07666015625|cri_loss: 0.047027587890625|unsuper_loss: 0.0
-average reward score: 4.1640625
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.58%) |Training time=0.48s (21.85%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.34
-epoch: 0|step: 246|ppo_ep: 1|act_loss: -0.057037353515625|cri_loss: -0.0242919921875|unsuper_loss: 0.0
-average reward score: 4.9375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.49%) |Training time=0.48s (21.94%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.34
-epoch: 0|step: 247|ppo_ep: 1|act_loss: -0.036651611328125|cri_loss: -0.016265869140625|unsuper_loss: 0.0
-average reward score: 4.28125
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.86%) |Training time=0.48s (21.66%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.35
-epoch: 0|step: 248|ppo_ep: 1|act_loss: 0.031280517578125|cri_loss: 0.019378662109375|unsuper_loss: 0.0
-average reward score: 4.2421875
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.43%) |Training time=0.49s (22.01%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.35
-[2023-04-14 08:56:39,389] [INFO] [logging.py:96:log_dist] [Rank 0] step=250, skipped=6, lr=[9.64258688914287e-06, 9.64258688914287e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 08:56:39,927] [INFO] [timer.py:199:stop] epoch=0/micro_step=250/global_step=250, RunningAvgSamplesPerSec=99.39271434606478, CurrSamplesPerSec=38.278583366131066, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 08:56:40,020] [INFO] [logging.py:96:log_dist] [Rank 0] step=250, skipped=5, lr=[4.996105491504296e-06, 4.996105491504296e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 249|ppo_ep: 1|act_loss: 0.09033203125|cri_loss: 0.050537109375|unsuper_loss: 0.0
-average reward score: 4.4609375
--------------------------------------------------------------------------------------
-|E2E latency=2.72s |Gather latency=0.00s (0.00%) |Generate time=1.62s (59.51%) |Training time=1.00s (36.79%) |Others=0.10 (3.71%)|CurSamplesPerSec=11.78 |AvgSamplesPerSec=14.33
-epoch: 0|step: 250|ppo_ep: 1|act_loss: -0.1090087890625|cri_loss: -0.0499267578125|unsuper_loss: 0.0
-average reward score: 4.80078125
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.45%) |Training time=0.48s (21.98%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.33
-epoch: 0|step: 251|ppo_ep: 1|act_loss: -0.18017578125|cri_loss: -0.0765380859375|unsuper_loss: 0.0
-average reward score: 4.68359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.78%) |Training time=0.45s (20.68%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.34
-epoch: 0|step: 252|ppo_ep: 1|act_loss: -0.17724609375|cri_loss: -0.08209228515625|unsuper_loss: 0.0
-average reward score: 4.9375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.97%) |Training time=0.47s (21.42%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.34
-epoch: 0|step: 253|ppo_ep: 1|act_loss: -0.12353515625|cri_loss: -0.0531005859375|unsuper_loss: 0.0
-average reward score: 5.26171875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.17%) |Training time=0.46s (21.22%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.34
-epoch: 0|step: 254|ppo_ep: 1|act_loss: 0.2373046875|cri_loss: 0.14013671875|unsuper_loss: 0.0
-average reward score: 4.77734375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.13%) |Training time=0.44s (20.23%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.34
-epoch: 0|step: 255|ppo_ep: 1|act_loss: -0.028411865234375|cri_loss: -0.00667572021484375|unsuper_loss: 0.0
-average reward score: 4.2421875
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.76s (77.44%) |Training time=0.41s (18.18%) |Others=0.10 (4.38%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.34
-epoch: 0|step: 256|ppo_ep: 1|act_loss: 0.0565185546875|cri_loss: 0.037017822265625|unsuper_loss: 0.0
-average reward score: 4.72265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.97%) |Training time=0.42s (19.35%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.34
-epoch: 0|step: 257|ppo_ep: 1|act_loss: 0.1236572265625|cri_loss: 0.067138671875|unsuper_loss: 0.0
-average reward score: 4.89453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.08%) |Training time=0.42s (19.34%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.34
-epoch: 0|step: 258|ppo_ep: 1|act_loss: 0.1390380859375|cri_loss: 0.0894775390625|unsuper_loss: 0.0
-average reward score: 4.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.31%) |Training time=0.43s (20.04%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.34
-[2023-04-14 08:57:01,938] [INFO] [logging.py:96:log_dist] [Rank 0] step=260, skipped=6, lr=[9.64152185255212e-06, 9.64152185255212e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 08:57:01,956] [INFO] [timer.py:199:stop] epoch=0/micro_step=260/global_step=260, RunningAvgSamplesPerSec=99.89679140432641, CurrSamplesPerSec=114.06243881213967, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 08:57:02,050] [INFO] [logging.py:96:log_dist] [Rank 0] step=260, skipped=5, lr=[4.9955499596048615e-06, 4.9955499596048615e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 259|ppo_ep: 1|act_loss: 0.0908203125|cri_loss: 0.06024169921875|unsuper_loss: 0.0
-average reward score: 4.9296875
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.80s (76.75%) |Training time=0.45s (19.04%) |Others=0.10 (4.21%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.34
-epoch: 0|step: 260|ppo_ep: 1|act_loss: 0.124267578125|cri_loss: 0.065673828125|unsuper_loss: 0.0
-average reward score: 5.125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.42%) |Training time=0.46s (20.96%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.34
-epoch: 0|step: 261|ppo_ep: 1|act_loss: 0.03094482421875|cri_loss: 0.02093505859375|unsuper_loss: 0.0
-average reward score: 4.8515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.58%) |Training time=0.45s (20.79%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.34
-epoch: 0|step: 262|ppo_ep: 1|act_loss: -0.03643798828125|cri_loss: -0.0055389404296875|unsuper_loss: 0.0
-average reward score: 4.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.85%) |Training time=0.44s (20.49%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.35
-epoch: 0|step: 263|ppo_ep: 1|act_loss: -0.098388671875|cri_loss: -0.0452880859375|unsuper_loss: 0.0
-average reward score: 4.0703125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.06%) |Training time=0.44s (20.24%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.35
-epoch: 0|step: 264|ppo_ep: 1|act_loss: -0.1561279296875|cri_loss: -0.04656982421875|unsuper_loss: 0.0
-average reward score: 4.61328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.59%) |Training time=0.45s (20.75%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.35
-epoch: 0|step: 265|ppo_ep: 1|act_loss: -0.072021484375|cri_loss: -0.0289306640625|unsuper_loss: 0.0
-average reward score: 4.859375
--------------------------------------------------------------------------------------
-|E2E latency=2.72s |Gather latency=0.00s (0.00%) |Generate time=1.63s (60.04%) |Training time=0.45s (16.66%) |Others=0.63 (23.30%)|CurSamplesPerSec=11.76 |AvgSamplesPerSec=14.34
-epoch: 0|step: 266|ppo_ep: 1|act_loss: 0.00478363037109375|cri_loss: 0.006679534912109375|unsuper_loss: 0.0
-average reward score: 4.609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.65%) |Training time=0.45s (20.69%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.34
-epoch: 0|step: 267|ppo_ep: 1|act_loss: 0.04937744140625|cri_loss: 0.029876708984375|unsuper_loss: 0.0
-average reward score: 4.15625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.83%) |Training time=0.45s (20.63%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.34
-epoch: 0|step: 268|ppo_ep: 1|act_loss: 0.050994873046875|cri_loss: 0.0362548828125|unsuper_loss: 0.0
-average reward score: 4.35546875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.38%) |Training time=0.43s (19.99%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.34
-[2023-04-14 08:57:24,192] [INFO] [logging.py:96:log_dist] [Rank 0] step=270, skipped=6, lr=[9.640385423488292e-06, 9.640385423488292e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 08:57:24,211] [INFO] [timer.py:199:stop] epoch=0/micro_step=270/global_step=270, RunningAvgSamplesPerSec=100.33253846211156, CurrSamplesPerSec=118.06418606287716, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 08:57:24,304] [INFO] [logging.py:96:log_dist] [Rank 0] step=270, skipped=5, lr=[4.99495743763484e-06, 4.99495743763484e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 269|ppo_ep: 1|act_loss: -0.050506591796875|cri_loss: -0.022216796875|unsuper_loss: 0.0
-average reward score: 5.25390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.39%) |Training time=0.43s (20.00%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
-epoch: 0|step: 270|ppo_ep: 1|act_loss: 0.007083892822265625|cri_loss: 0.00634765625|unsuper_loss: 0.0
-average reward score: 5.26171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.63%) |Training time=0.43s (19.72%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
-epoch: 0|step: 271|ppo_ep: 1|act_loss: -0.02874755859375|cri_loss: -0.01068115234375|unsuper_loss: 0.0
-average reward score: 5.3828125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.24%) |Training time=0.43s (19.78%) |Others=0.11 (4.98%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.35
-epoch: 0|step: 272|ppo_ep: 1|act_loss: 0.158203125|cri_loss: 0.0850830078125|unsuper_loss: 0.0
-average reward score: 4.30859375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.42%) |Training time=0.46s (20.89%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.35
-epoch: 0|step: 273|ppo_ep: 1|act_loss: 0.04718017578125|cri_loss: 0.0275421142578125|unsuper_loss: 0.0
-average reward score: 4.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.38s |Gather latency=0.00s (0.00%) |Generate time=1.83s (76.95%) |Training time=0.45s (18.83%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.47 |AvgSamplesPerSec=14.34
-epoch: 0|step: 274|ppo_ep: 1|act_loss: -0.04803466796875|cri_loss: -0.0196685791015625|unsuper_loss: 0.0
-average reward score: 5.3671875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.56%) |Training time=0.45s (20.64%) |Others=0.11 (4.80%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.34
-epoch: 0|step: 275|ppo_ep: 1|act_loss: 0.01389312744140625|cri_loss: 0.0127716064453125|unsuper_loss: 0.0
-average reward score: 4.5859375
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.88%) |Training time=0.48s (21.61%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.34
-epoch: 0|step: 276|ppo_ep: 1|act_loss: -0.10394287109375|cri_loss: -0.044525146484375|unsuper_loss: 0.0
-average reward score: 4.4609375
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.63s (69.59%) |Training time=0.47s (20.00%) |Others=0.24 (10.40%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.34
-epoch: 0|step: 277|ppo_ep: 1|act_loss: -0.0570068359375|cri_loss: -0.0216827392578125|unsuper_loss: 0.0
-average reward score: 4.35546875
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.06%) |Training time=0.45s (20.43%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.34
-epoch: 0|step: 278|ppo_ep: 1|act_loss: -0.020050048828125|cri_loss: -0.0082244873046875|unsuper_loss: 0.0
-average reward score: 4.79296875
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.64s (73.66%) |Training time=0.48s (21.65%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.40 |AvgSamplesPerSec=14.34
-[2023-04-14 08:57:46,450] [INFO] [logging.py:96:log_dist] [Rank 0] step=280, skipped=6, lr=[9.639177618796e-06, 9.639177618796e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 08:57:46,468] [INFO] [timer.py:199:stop] epoch=0/micro_step=280/global_step=280, RunningAvgSamplesPerSec=100.64003665757109, CurrSamplesPerSec=108.39001360752977, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 08:57:46,561] [INFO] [logging.py:96:log_dist] [Rank 0] step=280, skipped=5, lr=[4.994327934376836e-06, 4.994327934376836e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 279|ppo_ep: 1|act_loss: 0.05682373046875|cri_loss: 0.0301666259765625|unsuper_loss: 0.0
-average reward score: 4.76953125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.56%) |Training time=0.46s (20.92%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.34
-epoch: 0|step: 280|ppo_ep: 1|act_loss: 0.1710205078125|cri_loss: 0.101806640625|unsuper_loss: 0.0
-average reward score: 5.171875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.41%) |Training time=0.46s (20.99%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.34
-epoch: 0|step: 281|ppo_ep: 1|act_loss: 0.1187744140625|cri_loss: 0.06683349609375|unsuper_loss: 0.0
-average reward score: 5.09765625
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.98%) |Training time=0.47s (21.51%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.35
-epoch: 0|step: 282|ppo_ep: 1|act_loss: 0.095703125|cri_loss: 0.05072021484375|unsuper_loss: 0.0
-average reward score: 5.14453125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.53%) |Training time=0.46s (20.85%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.35
-epoch: 0|step: 283|ppo_ep: 1|act_loss: -0.037811279296875|cri_loss: -0.017578125|unsuper_loss: 0.0
-average reward score: 4.9453125
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.13%) |Training time=0.47s (21.31%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.35
-epoch: 0|step: 284|ppo_ep: 1|act_loss: 0.009124755859375|cri_loss: 0.00556182861328125|unsuper_loss: 0.0
-average reward score: 5.03125
--------------------------------------------------------------------------------------
-|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.73s (75.48%) |Training time=0.46s (20.10%) |Others=0.10 (4.41%)|CurSamplesPerSec=13.97 |AvgSamplesPerSec=14.35
-epoch: 0|step: 285|ppo_ep: 1|act_loss: -0.05279541015625|cri_loss: -0.0239105224609375|unsuper_loss: 0.0
-average reward score: 5.234375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.38%) |Training time=0.44s (20.00%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.35
-epoch: 0|step: 286|ppo_ep: 1|act_loss: -0.0899658203125|cri_loss: -0.0411376953125|unsuper_loss: 0.0
-average reward score: 5.1796875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.85%) |Training time=0.42s (19.49%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.35
-epoch: 0|step: 287|ppo_ep: 1|act_loss: -0.02752685546875|cri_loss: -0.01082611083984375|unsuper_loss: 0.0
-average reward score: 4.94140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.18%) |Training time=0.42s (19.26%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.35
-epoch: 0|step: 288|ppo_ep: 1|act_loss: -0.000614166259765625|cri_loss: 0.00179290771484375|unsuper_loss: 0.0
-average reward score: 4.75390625
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.21%) |Training time=0.43s (18.46%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.70 |AvgSamplesPerSec=14.35
-[2023-04-14 08:58:08,558] [INFO] [logging.py:96:log_dist] [Rank 0] step=290, skipped=6, lr=[9.637898456377828e-06, 9.637898456377828e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 08:58:08,576] [INFO] [timer.py:199:stop] epoch=0/micro_step=290/global_step=290, RunningAvgSamplesPerSec=101.01194688929871, CurrSamplesPerSec=112.96647476685857, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 08:58:08,668] [INFO] [logging.py:96:log_dist] [Rank 0] step=290, skipped=5, lr=[4.993661459161605e-06, 4.993661459161605e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 289|ppo_ep: 1|act_loss: -0.08251953125|cri_loss: -0.0379638671875|unsuper_loss: 0.0
-average reward score: 4.84375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.91%) |Training time=0.45s (20.57%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.35
-epoch: 0|step: 290|ppo_ep: 1|act_loss: 0.0092010498046875|cri_loss: 0.008026123046875|unsuper_loss: 0.0
-average reward score: 5.48828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.73%) |Training time=0.45s (20.64%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.35
-epoch: 0|step: 291|ppo_ep: 1|act_loss: 0.030975341796875|cri_loss: 0.016845703125|unsuper_loss: 0.0
-average reward score: 5.55859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.66%) |Training time=0.45s (20.78%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.35
-epoch: 0|step: 292|ppo_ep: 1|act_loss: 0.0404052734375|cri_loss: 0.0223846435546875|unsuper_loss: 0.0
-average reward score: 5.3828125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.07%) |Training time=0.44s (20.30%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.35
-epoch: 0|step: 293|ppo_ep: 1|act_loss: -0.0026226043701171875|cri_loss: 0.0005626678466796875|unsuper_loss: 0.0
-average reward score: 4.88671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.89%) |Training time=0.45s (20.56%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.35
-epoch: 0|step: 294|ppo_ep: 1|act_loss: 0.1007080078125|cri_loss: 0.056976318359375|unsuper_loss: 0.0
-average reward score: 5.7421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.53%) |Training time=0.45s (20.84%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.35
-epoch: 0|step: 295|ppo_ep: 1|act_loss: -0.01436614990234375|cri_loss: -0.00609588623046875|unsuper_loss: 0.0
-average reward score: 5.13671875
--------------------------------------------------------------------------------------
-|E2E latency=3.40s |Gather latency=0.00s (0.00%) |Generate time=1.62s (47.50%) |Training time=0.45s (13.18%) |Others=1.34 (39.33%)|CurSamplesPerSec=9.40 |AvgSamplesPerSec=14.33
-epoch: 0|step: 296|ppo_ep: 1|act_loss: -0.000537872314453125|cri_loss: 0.003475189208984375|unsuper_loss: 0.0
-average reward score: 4.97265625
--------------------------------------------------------------------------------------
-|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.95%) |Training time=0.39s (18.28%) |Others=0.10 (4.77%)|CurSamplesPerSec=15.12 |AvgSamplesPerSec=14.33
-epoch: 0|step: 297|ppo_ep: 1|act_loss: -0.042449951171875|cri_loss: -0.017486572265625|unsuper_loss: 0.0
-average reward score: 5.71484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.76%) |Training time=0.45s (20.70%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.33
-epoch: 0|step: 298|ppo_ep: 1|act_loss: -0.001461029052734375|cri_loss: 0.00197601318359375|unsuper_loss: 0.0
-average reward score: 6.12109375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.55%) |Training time=0.45s (20.81%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.33
-[2023-04-14 08:58:31,449] [INFO] [logging.py:96:log_dist] [Rank 0] step=300, skipped=6, lr=[9.636547955194047e-06, 9.636547955194047e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 08:58:31,467] [INFO] [timer.py:199:stop] epoch=0/micro_step=300/global_step=300, RunningAvgSamplesPerSec=101.42191353477254, CurrSamplesPerSec=116.11825887144846, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 08:58:31,561] [INFO] [logging.py:96:log_dist] [Rank 0] step=300, skipped=5, lr=[4.9929580218679195e-06, 4.9929580218679195e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 299|ppo_ep: 1|act_loss: -0.037841796875|cri_loss: -0.017974853515625|unsuper_loss: 0.0
-average reward score: 6.19140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.10%) |Training time=0.44s (20.32%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.34
-epoch: 0|step: 300|ppo_ep: 1|act_loss: 0.1055908203125|cri_loss: 0.0635986328125|unsuper_loss: 0.0
-average reward score: 6.1328125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.65%) |Training time=0.43s (19.69%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.34
-epoch: 0|step: 301|ppo_ep: 1|act_loss: 0.01062774658203125|cri_loss: 0.00878143310546875|unsuper_loss: 0.0
-average reward score: 4.91796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.96%) |Training time=0.44s (20.39%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
-epoch: 0|step: 302|ppo_ep: 1|act_loss: 0.02557373046875|cri_loss: 0.0182647705078125|unsuper_loss: 0.0
-average reward score: 5.859375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.99%) |Training time=0.44s (20.36%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.34
-epoch: 0|step: 303|ppo_ep: 1|act_loss: -0.0107421875|cri_loss: -0.004383087158203125|unsuper_loss: 0.0
-average reward score: 5.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.17%) |Training time=0.43s (18.60%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.34
-epoch: 0|step: 304|ppo_ep: 1|act_loss: -0.0123443603515625|cri_loss: -0.00382232666015625|unsuper_loss: 0.0
-average reward score: 5.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.97%) |Training time=0.44s (20.37%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.34
-epoch: 0|step: 305|ppo_ep: 1|act_loss: -0.01494598388671875|cri_loss: -0.0056304931640625|unsuper_loss: 0.0
-average reward score: 5.9453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.88%) |Training time=0.44s (20.49%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.34
-epoch: 0|step: 306|ppo_ep: 1|act_loss: -0.00437164306640625|cri_loss: -0.0005321502685546875|unsuper_loss: 0.0
-average reward score: 6.4765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.71%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34
-epoch: 0|step: 307|ppo_ep: 1|act_loss: -0.009765625|cri_loss: -0.004009246826171875|unsuper_loss: 0.0
-average reward score: 5.921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.28%) |Training time=0.44s (20.17%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.34
-epoch: 0|step: 308|ppo_ep: 1|act_loss: 0.039825439453125|cri_loss: 0.025970458984375|unsuper_loss: 0.0
-average reward score: 5.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.18%) |Training time=0.43s (20.05%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.34
-[2023-04-14 08:58:53,301] [INFO] [logging.py:96:log_dist] [Rank 0] step=310, skipped=6, lr=[9.635126135262344e-06, 9.635126135262344e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 08:58:53,319] [INFO] [timer.py:199:stop] epoch=0/micro_step=310/global_step=310, RunningAvgSamplesPerSec=101.82524227057964, CurrSamplesPerSec=112.93795796099022, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 08:58:53,412] [INFO] [logging.py:96:log_dist] [Rank 0] step=310, skipped=5, lr=[4.9922176329224145e-06, 4.9922176329224145e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 309|ppo_ep: 1|act_loss: -0.0081024169921875|cri_loss: -0.00330352783203125|unsuper_loss: 0.0
-average reward score: 5.76953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.77%) |Training time=0.45s (20.61%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.35
-[2023-04-14 08:58:55,462] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096
-epoch: 0|step: 310|ppo_ep: 1|act_loss: 0.0209503173828125|cri_loss: 0.0167999267578125|unsuper_loss: 0.0
-average reward score: 5.25
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.57%) |Training time=0.42s (19.76%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.35
-epoch: 0|step: 311|ppo_ep: 1|act_loss: 0.05865478515625|cri_loss: 0.036773681640625|unsuper_loss: 0.0
-average reward score: 6.4453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.01%) |Training time=0.44s (20.45%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.35
-epoch: 0|step: 312|ppo_ep: 1|act_loss: -0.0080413818359375|cri_loss: -0.001312255859375|unsuper_loss: 0.0
-average reward score: 6.2578125
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.67s (75.84%) |Training time=0.43s (19.60%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.35
-epoch: 0|step: 313|ppo_ep: 1|act_loss: 0.00653076171875|cri_loss: 0.0057525634765625|unsuper_loss: 0.0
-average reward score: 7.109375
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.73s (76.11%) |Training time=0.44s (19.49%) |Others=0.10 (4.41%)|CurSamplesPerSec=14.07 |AvgSamplesPerSec=14.35
-epoch: 0|step: 314|ppo_ep: 1|act_loss: 0.0157012939453125|cri_loss: 0.0095367431640625|unsuper_loss: 0.0
-average reward score: 6.78515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.07%) |Training time=0.44s (20.28%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.35
-epoch: 0|step: 315|ppo_ep: 1|act_loss: -0.037750244140625|cri_loss: -0.016204833984375|unsuper_loss: 0.0
-average reward score: 6.21875
--------------------------------------------------------------------------------------
-|E2E latency=2.49s |Gather latency=0.00s (0.00%) |Generate time=1.64s (66.05%) |Training time=0.43s (17.14%) |Others=0.42 (16.82%)|CurSamplesPerSec=12.85 |AvgSamplesPerSec=14.34
-epoch: 0|step: 316|ppo_ep: 1|act_loss: 0.03277587890625|cri_loss: 0.0176544189453125|unsuper_loss: 0.0
-average reward score: 5.11328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.60%) |Training time=0.43s (19.71%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.35
-epoch: 0|step: 317|ppo_ep: 1|act_loss: -0.0408935546875|cri_loss: -0.0183868408203125|unsuper_loss: 0.0
-average reward score: 5.609375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.78%) |Training time=0.45s (20.72%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.35
-epoch: 0|step: 318|ppo_ep: 1|act_loss: 0.0999755859375|cri_loss: 0.059661865234375|unsuper_loss: 0.0
-average reward score: 6.859375
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.09%) |Training time=0.46s (19.62%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.34
-[2023-04-14 08:59:15,615] [INFO] [logging.py:96:log_dist] [Rank 0] step=320, skipped=7, lr=[9.63378553719082e-06, 9.63378553719082e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 08:59:15,633] [INFO] [timer.py:199:stop] epoch=0/micro_step=320/global_step=320, RunningAvgSamplesPerSec=102.20252850967381, CurrSamplesPerSec=109.19005671926405, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 08:59:15,725] [INFO] [logging.py:96:log_dist] [Rank 0] step=320, skipped=5, lr=[4.991440303299444e-06, 4.991440303299444e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 319|ppo_ep: 1|act_loss: 0.01959228515625|cri_loss: 0.01177978515625|unsuper_loss: 0.0
-average reward score: 5.95703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.51%) |Training time=0.46s (20.98%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.35
-epoch: 0|step: 320|ppo_ep: 1|act_loss: -0.0213623046875|cri_loss: -0.009521484375|unsuper_loss: 0.0
-average reward score: 6.09765625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.15%) |Training time=0.46s (21.24%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.35
-epoch: 0|step: 321|ppo_ep: 1|act_loss: 0.0077362060546875|cri_loss: 0.006229400634765625|unsuper_loss: 0.0
-average reward score: 6.9609375
--------------------------------------------------------------------------------------
-|E2E latency=2.45s |Gather latency=0.00s (0.00%) |Generate time=1.62s (66.12%) |Training time=0.46s (18.88%) |Others=0.37 (15.00%)|CurSamplesPerSec=13.07 |AvgSamplesPerSec=14.34
-epoch: 0|step: 322|ppo_ep: 1|act_loss: 0.031463623046875|cri_loss: 0.0181884765625|unsuper_loss: 0.0
-average reward score: 5.71484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.11%) |Training time=0.46s (21.28%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34
-epoch: 0|step: 323|ppo_ep: 1|act_loss: -0.0440673828125|cri_loss: -0.02008056640625|unsuper_loss: 0.0
-average reward score: 6.68359375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.13%) |Training time=0.46s (21.25%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.34
-epoch: 0|step: 324|ppo_ep: 1|act_loss: -0.05035400390625|cri_loss: -0.023223876953125|unsuper_loss: 0.0
-average reward score: 5.9375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.15%) |Training time=0.46s (21.23%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.35
-epoch: 0|step: 325|ppo_ep: 1|act_loss: 0.00405120849609375|cri_loss: 0.004222869873046875|unsuper_loss: 0.0
-average reward score: 5.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.21%) |Training time=0.46s (21.28%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.35
-epoch: 0|step: 326|ppo_ep: 1|act_loss: -0.024139404296875|cri_loss: -0.00982666015625|unsuper_loss: 0.0
-average reward score: 6.15234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.41%) |Training time=0.45s (20.94%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.35
-epoch: 0|step: 327|ppo_ep: 1|act_loss: 0.01953125|cri_loss: 0.0111846923828125|unsuper_loss: 0.0
-average reward score: 5.8515625
--------------------------------------------------------------------------------------
-|E2E latency=2.77s |Gather latency=0.00s (0.00%) |Generate time=1.62s (58.42%) |Training time=0.47s (16.78%) |Others=0.69 (24.80%)|CurSamplesPerSec=11.55 |AvgSamplesPerSec=14.34
-epoch: 0|step: 328|ppo_ep: 1|act_loss: 0.0179443359375|cri_loss: 0.0124053955078125|unsuper_loss: 0.0
-average reward score: 5.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.81%) |Training time=0.45s (20.57%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.34
-[2023-04-14 08:59:38,260] [INFO] [logging.py:96:log_dist] [Rank 0] step=330, skipped=7, lr=[9.632228270572594e-06, 9.632228270572594e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 08:59:38,278] [INFO] [timer.py:199:stop] epoch=0/micro_step=330/global_step=330, RunningAvgSamplesPerSec=102.38817781321211, CurrSamplesPerSec=120.18967042650056, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 08:59:38,371] [INFO] [logging.py:96:log_dist] [Rank 0] step=330, skipped=5, lr=[4.990626044520905e-06, 4.990626044520905e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 329|ppo_ep: 1|act_loss: -0.00534820556640625|cri_loss: 0.001178741455078125|unsuper_loss: 0.0
-average reward score: 5.6953125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.66%) |Training time=0.43s (19.71%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.34
-epoch: 0|step: 330|ppo_ep: 1|act_loss: -0.015716552734375|cri_loss: -0.00428009033203125|unsuper_loss: 0.0
-average reward score: 6.42578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.70%) |Training time=0.43s (19.65%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34
-epoch: 0|step: 331|ppo_ep: 1|act_loss: -0.000396728515625|cri_loss: 0.0016536712646484375|unsuper_loss: 0.0
-average reward score: 6.6640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.11%) |Training time=0.44s (20.35%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
-epoch: 0|step: 332|ppo_ep: 1|act_loss: 0.01067352294921875|cri_loss: 0.01187896728515625|unsuper_loss: 0.0
-average reward score: 6.078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.03%) |Training time=0.44s (20.34%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
-epoch: 0|step: 333|ppo_ep: 1|act_loss: -0.0230712890625|cri_loss: -0.009796142578125|unsuper_loss: 0.0
-average reward score: 5.37109375
--------------------------------------------------------------------------------------
-|E2E latency=2.59s |Gather latency=0.00s (0.00%) |Generate time=1.80s (69.53%) |Training time=0.43s (16.75%) |Others=0.35 (13.72%)|CurSamplesPerSec=12.38 |AvgSamplesPerSec=14.34
-epoch: 0|step: 334|ppo_ep: 1|act_loss: 0.0168609619140625|cri_loss: 0.0110626220703125|unsuper_loss: 0.0
-average reward score: 6.1953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.68%) |Training time=0.45s (20.69%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.34
-epoch: 0|step: 335|ppo_ep: 1|act_loss: 0.01114654541015625|cri_loss: 0.00699615478515625|unsuper_loss: 0.0
-average reward score: 5.6484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.79%) |Training time=0.45s (20.67%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34
-epoch: 0|step: 336|ppo_ep: 1|act_loss: 0.06365966796875|cri_loss: 0.03387451171875|unsuper_loss: 0.0
-average reward score: 5.921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.95%) |Training time=0.44s (20.42%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
-epoch: 0|step: 337|ppo_ep: 1|act_loss: -0.011566162109375|cri_loss: -0.00455474853515625|unsuper_loss: 0.0
-average reward score: 6.4609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.64%) |Training time=0.45s (20.70%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34
-epoch: 0|step: 338|ppo_ep: 1|act_loss: 0.023193359375|cri_loss: 0.01505279541015625|unsuper_loss: 0.0
-average reward score: 6.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.61%) |Training time=0.45s (20.75%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
-[2023-04-14 09:00:00,381] [INFO] [logging.py:96:log_dist] [Rank 0] step=340, skipped=7, lr=[9.630599749234592e-06, 9.630599749234592e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:00:01,026] [INFO] [timer.py:199:stop] epoch=0/micro_step=340/global_step=340, RunningAvgSamplesPerSec=102.09652405965743, CurrSamplesPerSec=35.20821465465094, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:00:01,119] [INFO] [logging.py:96:log_dist] [Rank 0] step=340, skipped=5, lr=[4.989774868656078e-06, 4.989774868656078e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 339|ppo_ep: 1|act_loss: -0.022247314453125|cri_loss: -0.0097503662109375|unsuper_loss: 0.0
-average reward score: 8.203125
--------------------------------------------------------------------------------------
-|E2E latency=2.80s |Gather latency=0.00s (0.00%) |Generate time=1.63s (58.17%) |Training time=1.07s (38.32%) |Others=0.10 (3.51%)|CurSamplesPerSec=11.44 |AvgSamplesPerSec=14.33
-epoch: 0|step: 340|ppo_ep: 1|act_loss: -0.02862548828125|cri_loss: -0.01319122314453125|unsuper_loss: 0.0
-average reward score: 6.9453125
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.07%) |Training time=0.45s (20.63%) |Others=0.12 (5.30%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.33
-epoch: 0|step: 341|ppo_ep: 1|act_loss: -0.003849029541015625|cri_loss: -0.000995635986328125|unsuper_loss: 0.0
-average reward score: 6.2421875
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.73s (76.32%) |Training time=0.44s (19.25%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.33
-epoch: 0|step: 342|ppo_ep: 1|act_loss: 0.01129913330078125|cri_loss: 0.007045745849609375|unsuper_loss: 0.0
-average reward score: 6.6328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.58%) |Training time=0.45s (20.79%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.33
-epoch: 0|step: 343|ppo_ep: 1|act_loss: -0.0092620849609375|cri_loss: -0.00258636474609375|unsuper_loss: 0.0
-average reward score: 5.98046875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.92%) |Training time=0.45s (20.53%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.33
-epoch: 0|step: 344|ppo_ep: 1|act_loss: -0.00565338134765625|cri_loss: -0.0018253326416015625|unsuper_loss: 0.0
-average reward score: 5.8125
--------------------------------------------------------------------------------------
-|E2E latency=2.82s |Gather latency=0.00s (0.00%) |Generate time=1.64s (58.16%) |Training time=0.43s (15.11%) |Others=0.75 (26.73%)|CurSamplesPerSec=11.36 |AvgSamplesPerSec=14.32
-epoch: 0|step: 345|ppo_ep: 1|act_loss: -0.01235198974609375|cri_loss: -0.005222320556640625|unsuper_loss: 0.0
-average reward score: 5.75
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.88%) |Training time=0.45s (20.58%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.32
-epoch: 0|step: 346|ppo_ep: 1|act_loss: 0.03094482421875|cri_loss: 0.0184478759765625|unsuper_loss: 0.0
-average reward score: 6.375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.01%) |Training time=0.45s (20.39%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.32
-epoch: 0|step: 347|ppo_ep: 1|act_loss: 0.0733642578125|cri_loss: 0.044921875|unsuper_loss: 0.0
-average reward score: 5.9453125
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.65%) |Training time=0.45s (19.14%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.32
-epoch: 0|step: 348|ppo_ep: 1|act_loss: 0.010406494140625|cri_loss: 0.00753021240234375|unsuper_loss: 0.0
-average reward score: 5.79296875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.73%) |Training time=0.45s (20.61%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.32
-[2023-04-14 09:00:23,695] [INFO] [logging.py:96:log_dist] [Rank 0] step=350, skipped=7, lr=[9.628899997315426e-06, 9.628899997315426e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:00:23,713] [INFO] [timer.py:199:stop] epoch=0/micro_step=350/global_step=350, RunningAvgSamplesPerSec=102.38370118482274, CurrSamplesPerSec=111.07989661499347, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:00:23,806] [INFO] [logging.py:96:log_dist] [Rank 0] step=350, skipped=5, lr=[4.988886788321443e-06, 4.988886788321443e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 349|ppo_ep: 1|act_loss: -0.00733184814453125|cri_loss: -0.0023441314697265625|unsuper_loss: 0.0
-average reward score: 6.18359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.80%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.32
-epoch: 0|step: 350|ppo_ep: 1|act_loss: 0.00539398193359375|cri_loss: 0.00363922119140625|unsuper_loss: 0.0
-average reward score: 6.51953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.39%) |Training time=0.43s (19.99%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33
-epoch: 0|step: 351|ppo_ep: 1|act_loss: 0.02569580078125|cri_loss: 0.01378631591796875|unsuper_loss: 0.0
-average reward score: 5.9296875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.58%) |Training time=0.43s (19.88%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.33
-epoch: 0|step: 352|ppo_ep: 1|act_loss: 0.0211639404296875|cri_loss: 0.0124969482421875|unsuper_loss: 0.0
-average reward score: 6.20703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.73%) |Training time=0.43s (19.62%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.33
-epoch: 0|step: 353|ppo_ep: 1|act_loss: 0.0112762451171875|cri_loss: 0.006603240966796875|unsuper_loss: 0.0
-average reward score: 6.05078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.74%) |Training time=0.43s (19.70%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.33
-epoch: 0|step: 354|ppo_ep: 1|act_loss: -0.017547607421875|cri_loss: -0.008026123046875|unsuper_loss: 0.0
-average reward score: 6.3203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.14%) |Training time=0.44s (20.21%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.33
-epoch: 0|step: 355|ppo_ep: 1|act_loss: 0.001312255859375|cri_loss: 0.00237274169921875|unsuper_loss: 0.0
-average reward score: 5.6015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.88%) |Training time=0.45s (20.58%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.33
-epoch: 0|step: 356|ppo_ep: 1|act_loss: -0.03271484375|cri_loss: -0.01531982421875|unsuper_loss: 0.0
-average reward score: 6.9765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.00%) |Training time=0.44s (20.33%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.33
-epoch: 0|step: 357|ppo_ep: 1|act_loss: -0.007843017578125|cri_loss: -0.0031490325927734375|unsuper_loss: 0.0
-average reward score: 6.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.61%) |Training time=0.45s (20.70%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.33
-epoch: 0|step: 358|ppo_ep: 1|act_loss: -0.00775909423828125|cri_loss: -0.003040313720703125|unsuper_loss: 0.0
-average reward score: 5.66015625
--------------------------------------------------------------------------------------
-|E2E latency=2.67s |Gather latency=0.00s (0.00%) |Generate time=1.64s (61.37%) |Training time=0.43s (16.08%) |Others=0.60 (22.54%)|CurSamplesPerSec=11.98 |AvgSamplesPerSec=14.33
-[2023-04-14 09:00:45,855] [INFO] [logging.py:96:log_dist] [Rank 0] step=360, skipped=7, lr=[9.627129040009524e-06, 9.627129040009524e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:00:45,873] [INFO] [timer.py:199:stop] epoch=0/micro_step=360/global_step=360, RunningAvgSamplesPerSec=102.79024752173669, CurrSamplesPerSec=142.6891849079872, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:00:45,965] [INFO] [logging.py:96:log_dist] [Rank 0] step=360, skipped=5, lr=[4.987961816680493e-06, 4.987961816680493e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 359|ppo_ep: 1|act_loss: -0.0162811279296875|cri_loss: -0.0064697265625|unsuper_loss: 0.0
-average reward score: 6.65625
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.64s (77.15%) |Training time=0.39s (18.20%) |Others=0.10 (4.64%)|CurSamplesPerSec=15.03 |AvgSamplesPerSec=14.33
-epoch: 0|step: 360|ppo_ep: 1|act_loss: -0.03131103515625|cri_loss: -0.01505279541015625|unsuper_loss: 0.0
-average reward score: 6.44921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.68%) |Training time=0.45s (20.68%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33
-epoch: 0|step: 361|ppo_ep: 1|act_loss: 0.02545166015625|cri_loss: 0.0144805908203125|unsuper_loss: 0.0
-average reward score: 5.66796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.23%) |Training time=0.44s (20.24%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33
-epoch: 0|step: 362|ppo_ep: 1|act_loss: 0.07672119140625|cri_loss: 0.042572021484375|unsuper_loss: 0.0
-average reward score: 6.07421875
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.80s (76.93%) |Training time=0.44s (18.79%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.33
-epoch: 0|step: 363|ppo_ep: 1|act_loss: -0.13330078125|cri_loss: -0.06036376953125|unsuper_loss: 0.0
-average reward score: 6.20703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.54%) |Training time=0.43s (19.82%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.33
-epoch: 0|step: 364|ppo_ep: 1|act_loss: 0.0775146484375|cri_loss: 0.042633056640625|unsuper_loss: 0.0
-average reward score: 5.8046875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.44%) |Training time=0.43s (19.94%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.33
-epoch: 0|step: 365|ppo_ep: 1|act_loss: 0.0132904052734375|cri_loss: 0.00804901123046875|unsuper_loss: 0.0
-average reward score: 5.5703125
--------------------------------------------------------------------------------------
-|E2E latency=2.53s |Gather latency=0.00s (0.00%) |Generate time=1.63s (64.45%) |Training time=0.44s (17.53%) |Others=0.46 (18.02%)|CurSamplesPerSec=12.66 |AvgSamplesPerSec=14.32
-epoch: 0|step: 366|ppo_ep: 1|act_loss: -0.025360107421875|cri_loss: -0.01123809814453125|unsuper_loss: 0.0
-average reward score: 5.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.18%) |Training time=0.44s (20.19%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.33
-epoch: 0|step: 367|ppo_ep: 1|act_loss: -0.018341064453125|cri_loss: -0.00739288330078125|unsuper_loss: 0.0
-average reward score: 5.3359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.64%) |Training time=0.43s (19.82%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.33
-epoch: 0|step: 368|ppo_ep: 1|act_loss: -0.03570556640625|cri_loss: -0.0162506103515625|unsuper_loss: 0.0
-average reward score: 4.9609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.39%) |Training time=0.43s (19.88%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.33
-[2023-04-14 09:01:08,114] [INFO] [logging.py:96:log_dist] [Rank 0] step=370, skipped=7, lr=[9.625286903566743e-06, 9.625286903566743e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:01:08,133] [INFO] [timer.py:199:stop] epoch=0/micro_step=370/global_step=370, RunningAvgSamplesPerSec=103.09254097134992, CurrSamplesPerSec=103.87621324471226, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:01:08,227] [INFO] [logging.py:96:log_dist] [Rank 0] step=370, skipped=5, lr=[4.986999967443538e-06, 4.986999967443538e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 369|ppo_ep: 1|act_loss: 0.037139892578125|cri_loss: 0.02215576171875|unsuper_loss: 0.0
-average reward score: 5.98046875
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.98%) |Training time=0.47s (21.40%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.54 |AvgSamplesPerSec=14.33
-epoch: 0|step: 370|ppo_ep: 1|act_loss: -0.025787353515625|cri_loss: -0.0121307373046875|unsuper_loss: 0.0
-average reward score: 5.40625
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.80%) |Training time=0.45s (19.78%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.10 |AvgSamplesPerSec=14.33
-epoch: 0|step: 371|ppo_ep: 1|act_loss: 0.15869140625|cri_loss: 0.1014404296875|unsuper_loss: 0.0
-average reward score: 6.26171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.71%) |Training time=0.45s (20.76%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33
-epoch: 0|step: 372|ppo_ep: 1|act_loss: 0.028961181640625|cri_loss: 0.0159454345703125|unsuper_loss: 0.0
-average reward score: 5.92578125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.55%) |Training time=0.46s (20.89%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.33
-epoch: 0|step: 373|ppo_ep: 1|act_loss: -0.00873565673828125|cri_loss: -0.00383758544921875|unsuper_loss: 0.0
-average reward score: 5.73046875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.99%) |Training time=0.44s (20.43%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.33
-epoch: 0|step: 374|ppo_ep: 1|act_loss: 0.05352783203125|cri_loss: 0.029754638671875|unsuper_loss: 0.0
-average reward score: 6.60546875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.83%) |Training time=0.42s (19.45%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.33
-epoch: 0|step: 375|ppo_ep: 1|act_loss: -0.0106658935546875|cri_loss: -0.004302978515625|unsuper_loss: 0.0
-average reward score: 5.89453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.77%) |Training time=0.45s (20.61%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.33
-epoch: 0|step: 376|ppo_ep: 1|act_loss: -0.08251953125|cri_loss: -0.03900146484375|unsuper_loss: 0.0
-average reward score: 5.85546875
--------------------------------------------------------------------------------------
-|E2E latency=2.46s |Gather latency=0.00s (0.00%) |Generate time=1.62s (65.79%) |Training time=0.46s (18.63%) |Others=0.38 (15.58%)|CurSamplesPerSec=13.01 |AvgSamplesPerSec=14.33
-epoch: 0|step: 377|ppo_ep: 1|act_loss: 0.0123748779296875|cri_loss: 0.0083465576171875|unsuper_loss: 0.0
-average reward score: 6.8984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.55%) |Training time=0.45s (20.76%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.33
-epoch: 0|step: 378|ppo_ep: 1|act_loss: -0.016845703125|cri_loss: -0.006771087646484375|unsuper_loss: 0.0
-average reward score: 6.5
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.54%) |Training time=0.45s (20.82%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.33
-[2023-04-14 09:01:30,242] [INFO] [logging.py:96:log_dist] [Rank 0] step=380, skipped=7, lr=[9.623373615291988e-06, 9.623373615291988e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:01:30,260] [INFO] [timer.py:199:stop] epoch=0/micro_step=380/global_step=380, RunningAvgSamplesPerSec=103.31409354931576, CurrSamplesPerSec=110.43399930555898, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:01:30,353] [INFO] [logging.py:96:log_dist] [Rank 0] step=380, skipped=5, lr=[4.986001254867505e-06, 4.986001254867505e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 379|ppo_ep: 1|act_loss: 0.00899505615234375|cri_loss: 0.004871368408203125|unsuper_loss: 0.0
-average reward score: 6.72265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.65%) |Training time=0.45s (20.85%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33
-epoch: 0|step: 380|ppo_ep: 1|act_loss: -0.007282257080078125|cri_loss: -0.00290679931640625|unsuper_loss: 0.0
-average reward score: 6.97265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.43%) |Training time=0.46s (20.97%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.33
-epoch: 0|step: 381|ppo_ep: 1|act_loss: -0.0330810546875|cri_loss: -0.0158538818359375|unsuper_loss: 0.0
-average reward score: 7.83203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.60%) |Training time=0.45s (20.85%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33
-epoch: 0|step: 382|ppo_ep: 1|act_loss: -0.0264892578125|cri_loss: -0.01214599609375|unsuper_loss: 0.0
-average reward score: 5.5390625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.75%) |Training time=0.45s (20.61%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.34
-epoch: 0|step: 383|ppo_ep: 1|act_loss: -0.029754638671875|cri_loss: -0.01397705078125|unsuper_loss: 0.0
-average reward score: 6.98046875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.42%) |Training time=0.43s (20.04%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
-epoch: 0|step: 384|ppo_ep: 1|act_loss: 0.044189453125|cri_loss: 0.0272216796875|unsuper_loss: 0.0
-average reward score: 5.6953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.78%) |Training time=0.45s (20.59%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.34
-epoch: 0|step: 385|ppo_ep: 1|act_loss: 0.024200439453125|cri_loss: 0.0135498046875|unsuper_loss: 0.0
-average reward score: 5.93359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.43%) |Training time=0.45s (20.91%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34
-epoch: 0|step: 386|ppo_ep: 1|act_loss: 0.044403076171875|cri_loss: 0.0243988037109375|unsuper_loss: 0.0
-average reward score: 6.16796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.73%) |Training time=0.45s (20.64%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
-[2023-04-14 09:01:47,714] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 387|ppo_ep: 1|act_loss: 0.048492431640625|cri_loss: 0.026763916015625|unsuper_loss: 0.0
-average reward score: 5.87109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.20%) |Training time=0.45s (20.62%) |Others=0.09 (4.19%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.34
-[2023-04-14 09:01:49,872] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 388|ppo_ep: 1|act_loss: 0.0159149169921875|cri_loss: 0.00897979736328125|unsuper_loss: 0.0
-average reward score: 5.68359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.66%) |Training time=0.43s (20.09%) |Others=0.09 (4.25%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.34
-[2023-04-14 09:01:51,930] [INFO] [logging.py:96:log_dist] [Rank 0] step=390, skipped=7, lr=[9.62138920354481e-06, 9.62138920354481e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:01:51,948] [INFO] [timer.py:199:stop] epoch=0/micro_step=390/global_step=390, RunningAvgSamplesPerSec=103.55384594517648, CurrSamplesPerSec=120.64981208262432, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:01:52,041] [INFO] [logging.py:96:log_dist] [Rank 0] step=390, skipped=7, lr=[4.985175753132026e-06, 4.985175753132026e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 389|ppo_ep: 1|act_loss: -0.0217132568359375|cri_loss: -0.01003265380859375|unsuper_loss: 0.0
-average reward score: 6.18359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.62%) |Training time=0.43s (19.72%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
-epoch: 0|step: 390|ppo_ep: 1|act_loss: 0.026214599609375|cri_loss: 0.0140380859375|unsuper_loss: 0.0
-average reward score: 5.77734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.59%) |Training time=0.45s (20.80%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34
-epoch: 0|step: 391|ppo_ep: 1|act_loss: 0.0062713623046875|cri_loss: 0.00415802001953125|unsuper_loss: 0.0
-average reward score: 5.71484375
--------------------------------------------------------------------------------------
-|E2E latency=2.42s |Gather latency=0.00s (0.00%) |Generate time=1.63s (67.10%) |Training time=0.44s (18.30%) |Others=0.35 (14.61%)|CurSamplesPerSec=13.21 |AvgSamplesPerSec=14.34
-epoch: 0|step: 392|ppo_ep: 1|act_loss: -0.021392822265625|cri_loss: -0.0094757080078125|unsuper_loss: 0.0
-average reward score: 5.67578125
--------------------------------------------------------------------------------------
-|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.80s (76.61%) |Training time=0.45s (19.10%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.64 |AvgSamplesPerSec=14.34
-epoch: 0|step: 393|ppo_ep: 1|act_loss: 0.0036029815673828125|cri_loss: 0.0069122314453125|unsuper_loss: 0.0
-average reward score: 5.3828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.56%) |Training time=0.43s (19.89%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.34
-epoch: 0|step: 394|ppo_ep: 1|act_loss: 0.027252197265625|cri_loss: 0.0158843994140625|unsuper_loss: 0.0
-average reward score: 6.08984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.04%) |Training time=0.44s (20.35%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34
-epoch: 0|step: 395|ppo_ep: 1|act_loss: -0.0112457275390625|cri_loss: -0.00496673583984375|unsuper_loss: 0.0
-average reward score: 5.51953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.13%) |Training time=0.44s (20.23%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34
-epoch: 0|step: 396|ppo_ep: 1|act_loss: -0.03106689453125|cri_loss: -0.014862060546875|unsuper_loss: 0.0
-average reward score: 4.6015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.42%) |Training time=0.43s (19.95%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.34
-epoch: 0|step: 397|ppo_ep: 1|act_loss: -0.04022216796875|cri_loss: -0.019287109375|unsuper_loss: 0.0
-average reward score: 5.375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.22%) |Training time=0.44s (20.05%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34
-epoch: 0|step: 398|ppo_ep: 1|act_loss: -0.04376220703125|cri_loss: -0.0200653076171875|unsuper_loss: 0.0
-average reward score: 5.734375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.65s (74.98%) |Training time=0.44s (20.24%) |Others=0.11 (4.78%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.34
-[2023-04-14 09:02:14,207] [INFO] [logging.py:96:log_dist] [Rank 0] step=400, skipped=7, lr=[9.619333697738975e-06, 9.619333697738975e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:02:14,225] [INFO] [timer.py:199:stop] epoch=0/micro_step=400/global_step=400, RunningAvgSamplesPerSec=103.79039135515188, CurrSamplesPerSec=104.92432531312882, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:02:14,317] [INFO] [logging.py:96:log_dist] [Rank 0] step=400, skipped=7, lr=[4.984110724217086e-06, 4.984110724217086e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 399|ppo_ep: 1|act_loss: -0.012847900390625|cri_loss: -0.004749298095703125|unsuper_loss: 0.0
-average reward score: 5.34765625
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.21%) |Training time=0.47s (20.49%) |Others=0.10 (4.30%)|CurSamplesPerSec=14.01 |AvgSamplesPerSec=14.34
-epoch: 0|step: 400|ppo_ep: 1|act_loss: 0.0682373046875|cri_loss: 0.0369873046875|unsuper_loss: 0.0
-average reward score: 5.3828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.58%) |Training time=0.45s (20.79%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34
-epoch: 0|step: 401|ppo_ep: 1|act_loss: 0.024078369140625|cri_loss: 0.012664794921875|unsuper_loss: 0.0
-average reward score: 5.3203125
--------------------------------------------------------------------------------------
-|E2E latency=2.59s |Gather latency=0.00s (0.00%) |Generate time=1.62s (62.63%) |Training time=0.45s (17.50%) |Others=0.51 (19.86%)|CurSamplesPerSec=12.36 |AvgSamplesPerSec=14.34
-epoch: 0|step: 402|ppo_ep: 1|act_loss: 0.0159912109375|cri_loss: 0.0086822509765625|unsuper_loss: 0.0
-average reward score: 5.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.37%) |Training time=0.46s (21.03%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.34
-epoch: 0|step: 403|ppo_ep: 1|act_loss: 0.021636962890625|cri_loss: 0.0117950439453125|unsuper_loss: 0.0
-average reward score: 5.29296875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.52%) |Training time=0.43s (19.83%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.34
-epoch: 0|step: 404|ppo_ep: 1|act_loss: 0.058624267578125|cri_loss: 0.03216552734375|unsuper_loss: 0.0
-average reward score: 5.73828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.61%) |Training time=0.43s (19.75%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.34
-epoch: 0|step: 405|ppo_ep: 1|act_loss: 0.02593994140625|cri_loss: 0.0146026611328125|unsuper_loss: 0.0
-average reward score: 5.6875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.85%) |Training time=0.45s (20.62%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34
-epoch: 0|step: 406|ppo_ep: 1|act_loss: 0.006076812744140625|cri_loss: 0.006511688232421875|unsuper_loss: 0.0
-average reward score: 6.0859375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.57%) |Training time=0.45s (20.82%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.34
-epoch: 0|step: 407|ppo_ep: 1|act_loss: -0.01947021484375|cri_loss: -0.00719451904296875|unsuper_loss: 0.0
-average reward score: 5.48046875
--------------------------------------------------------------------------------------
-|E2E latency=3.09s |Gather latency=0.00s (0.00%) |Generate time=1.78s (57.80%) |Training time=0.45s (14.56%) |Others=0.85 (27.64%)|CurSamplesPerSec=10.36 |AvgSamplesPerSec=14.33
-epoch: 0|step: 408|ppo_ep: 1|act_loss: 0.00424957275390625|cri_loss: 0.0063934326171875|unsuper_loss: 0.0
-average reward score: 5.99609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.50%) |Training time=0.45s (20.87%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.33
-[2023-04-14 09:02:37,266] [INFO] [logging.py:96:log_dist] [Rank 0] step=410, skipped=7, lr=[9.617207128342042e-06, 9.617207128342042e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:02:37,284] [INFO] [timer.py:199:stop] epoch=0/micro_step=410/global_step=410, RunningAvgSamplesPerSec=103.98547538695068, CurrSamplesPerSec=111.4699448786658, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:02:37,377] [INFO] [logging.py:96:log_dist] [Rank 0] step=410, skipped=7, lr=[4.983008874788623e-06, 4.983008874788623e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 409|ppo_ep: 1|act_loss: -0.04351806640625|cri_loss: -0.019500732421875|unsuper_loss: 0.0
-average reward score: 5.24609375
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.51%) |Training time=0.45s (20.31%) |Others=0.14 (6.18%)|CurSamplesPerSec=14.43 |AvgSamplesPerSec=14.33
-epoch: 0|step: 410|ppo_ep: 1|act_loss: -0.032745361328125|cri_loss: -0.0156402587890625|unsuper_loss: 0.0
-average reward score: 6.37890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.57%) |Training time=0.45s (20.81%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33
-epoch: 0|step: 411|ppo_ep: 1|act_loss: -0.0243377685546875|cri_loss: -0.0116424560546875|unsuper_loss: 0.0
-average reward score: 5.6015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.64%) |Training time=0.45s (20.77%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.33
-epoch: 0|step: 412|ppo_ep: 1|act_loss: 0.04522705078125|cri_loss: 0.0237884521484375|unsuper_loss: 0.0
-average reward score: 5.68359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.64%) |Training time=0.45s (20.70%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.33
-epoch: 0|step: 413|ppo_ep: 1|act_loss: -0.031341552734375|cri_loss: -0.01459503173828125|unsuper_loss: 0.0
-average reward score: 5.93359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.86%) |Training time=0.45s (20.51%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.33
-epoch: 0|step: 414|ppo_ep: 1|act_loss: 0.01171875|cri_loss: 0.0071258544921875|unsuper_loss: 0.0
-average reward score: 5.69140625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.72%) |Training time=0.45s (20.67%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.34
-epoch: 0|step: 415|ppo_ep: 1|act_loss: -0.045379638671875|cri_loss: -0.0215301513671875|unsuper_loss: 0.0
-average reward score: 7.15234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.78%) |Training time=0.45s (20.70%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.34
-epoch: 0|step: 416|ppo_ep: 1|act_loss: -0.01280975341796875|cri_loss: -0.0058746337890625|unsuper_loss: 0.0
-average reward score: 6.015625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.71%) |Training time=0.45s (20.66%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.34
-epoch: 0|step: 417|ppo_ep: 1|act_loss: -0.0185699462890625|cri_loss: -0.00850677490234375|unsuper_loss: 0.0
-average reward score: 6.12890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.84%) |Training time=0.45s (20.52%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
-epoch: 0|step: 418|ppo_ep: 1|act_loss: 0.0283050537109375|cri_loss: 0.0148162841796875|unsuper_loss: 0.0
-average reward score: 6.3359375
--------------------------------------------------------------------------------------
-|E2E latency=2.54s |Gather latency=0.00s (0.00%) |Generate time=1.63s (64.15%) |Training time=0.44s (17.27%) |Others=0.47 (18.57%)|CurSamplesPerSec=12.61 |AvgSamplesPerSec=14.33
-[2023-04-14 09:02:59,407] [INFO] [logging.py:96:log_dist] [Rank 0] step=420, skipped=7, lr=[9.615009526874895e-06, 9.615009526874895e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:02:59,425] [INFO] [timer.py:199:stop] epoch=0/micro_step=420/global_step=420, RunningAvgSamplesPerSec=104.16394471702999, CurrSamplesPerSec=110.71603520667838, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:02:59,518] [INFO] [logging.py:96:log_dist] [Rank 0] step=420, skipped=7, lr=[4.981870221178703e-06, 4.981870221178703e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 419|ppo_ep: 1|act_loss: 0.04669189453125|cri_loss: 0.025634765625|unsuper_loss: 0.0
-average reward score: 5.734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.55%) |Training time=0.45s (20.82%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33
-epoch: 0|step: 420|ppo_ep: 1|act_loss: 0.01861572265625|cri_loss: 0.0119781494140625|unsuper_loss: 0.0
-average reward score: 5.6953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.79%) |Training time=0.45s (20.54%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
-epoch: 0|step: 421|ppo_ep: 1|act_loss: 0.04266357421875|cri_loss: 0.023651123046875|unsuper_loss: 0.0
-average reward score: 5.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.76%) |Training time=0.45s (20.71%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.34
-epoch: 0|step: 422|ppo_ep: 1|act_loss: -0.026763916015625|cri_loss: -0.01251983642578125|unsuper_loss: 0.0
-average reward score: 6.8046875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.66%) |Training time=0.45s (20.65%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34
-epoch: 0|step: 423|ppo_ep: 1|act_loss: -0.02587890625|cri_loss: -0.011993408203125|unsuper_loss: 0.0
-average reward score: 5.5390625
--------------------------------------------------------------------------------------
-|E2E latency=3.01s |Gather latency=0.00s (0.00%) |Generate time=1.79s (59.41%) |Training time=0.45s (14.82%) |Others=0.78 (25.77%)|CurSamplesPerSec=10.61 |AvgSamplesPerSec=14.33
-epoch: 0|step: 424|ppo_ep: 1|act_loss: 0.0005359649658203125|cri_loss: 0.0005984306335449219|unsuper_loss: 0.0
-average reward score: 6.15234375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.89%) |Training time=0.45s (20.58%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.33
-epoch: 0|step: 425|ppo_ep: 1|act_loss: 0.05877685546875|cri_loss: 0.031890869140625|unsuper_loss: 0.0
-average reward score: 6.1171875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.81%) |Training time=0.45s (20.67%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.33
-epoch: 0|step: 426|ppo_ep: 1|act_loss: 0.00269317626953125|cri_loss: 0.0024261474609375|unsuper_loss: 0.0
-average reward score: 7.10546875
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.65s (74.81%) |Training time=0.46s (20.73%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.51 |AvgSamplesPerSec=14.33
-epoch: 0|step: 427|ppo_ep: 1|act_loss: -0.010650634765625|cri_loss: -0.0042266845703125|unsuper_loss: 0.0
-average reward score: 6.15625
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.66s (73.85%) |Training time=0.46s (20.32%) |Others=0.13 (5.83%)|CurSamplesPerSec=14.25 |AvgSamplesPerSec=14.33
-epoch: 0|step: 428|ppo_ep: 1|act_loss: -0.0057525634765625|cri_loss: -0.0024261474609375|unsuper_loss: 0.0
-average reward score: 7.1171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.80%) |Training time=0.45s (20.66%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.33
-[2023-04-14 09:03:22,100] [INFO] [logging.py:96:log_dist] [Rank 0] step=430, skipped=7, lr=[9.612740925911291e-06, 9.612740925911291e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:03:22,118] [INFO] [timer.py:199:stop] epoch=0/micro_step=430/global_step=430, RunningAvgSamplesPerSec=104.3303442174363, CurrSamplesPerSec=114.00963432544857, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:03:22,211] [INFO] [logging.py:96:log_dist] [Rank 0] step=430, skipped=7, lr=[4.980694780264918e-06, 4.980694780264918e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 429|ppo_ep: 1|act_loss: -0.003879547119140625|cri_loss: -0.0010061264038085938|unsuper_loss: 0.0
-average reward score: 6.046875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.17%) |Training time=0.44s (20.33%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.33
-epoch: 0|step: 430|ppo_ep: 1|act_loss: 0.00652313232421875|cri_loss: 0.00467681884765625|unsuper_loss: 0.0
-average reward score: 6.27734375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.60%) |Training time=0.45s (20.86%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.33
-epoch: 0|step: 431|ppo_ep: 1|act_loss: 0.0018978118896484375|cri_loss: 0.0019092559814453125|unsuper_loss: 0.0
-average reward score: 6.4453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.75%) |Training time=0.45s (20.71%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.33
-epoch: 0|step: 432|ppo_ep: 1|act_loss: 0.007190704345703125|cri_loss: 0.00390625|unsuper_loss: 0.0
-average reward score: 6.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.80%) |Training time=0.45s (20.65%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.33
-epoch: 0|step: 433|ppo_ep: 1|act_loss: 0.003032684326171875|cri_loss: 0.002223968505859375|unsuper_loss: 0.0
-average reward score: 6.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.09%) |Training time=0.44s (20.35%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.33
-epoch: 0|step: 434|ppo_ep: 1|act_loss: -0.058868408203125|cri_loss: -0.0277099609375|unsuper_loss: 0.0
-average reward score: 5.90234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.79%) |Training time=0.45s (20.68%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.33
-epoch: 0|step: 435|ppo_ep: 1|act_loss: -0.022735595703125|cri_loss: -0.01020050048828125|unsuper_loss: 0.0
-average reward score: 6.27734375
--------------------------------------------------------------------------------------
-|E2E latency=2.41s |Gather latency=0.00s (0.00%) |Generate time=1.62s (67.27%) |Training time=0.45s (18.76%) |Others=0.34 (13.97%)|CurSamplesPerSec=13.27 |AvgSamplesPerSec=14.33
-epoch: 0|step: 436|ppo_ep: 1|act_loss: 0.053619384765625|cri_loss: 0.02984619140625|unsuper_loss: 0.0
-average reward score: 6.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.81%) |Training time=0.45s (20.67%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33
-epoch: 0|step: 437|ppo_ep: 1|act_loss: 0.01629638671875|cri_loss: 0.01190948486328125|unsuper_loss: 0.0
-average reward score: 6.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.74%) |Training time=0.44s (19.03%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.33
-epoch: 0|step: 438|ppo_ep: 1|act_loss: -0.07861328125|cri_loss: -0.0305023193359375|unsuper_loss: 0.0
-average reward score: 6.1796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.73%) |Training time=0.45s (20.73%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33
-[2023-04-14 09:03:44,233] [INFO] [logging.py:96:log_dist] [Rank 0] step=440, skipped=7, lr=[9.61040135907737e-06, 9.61040135907737e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:03:44,251] [INFO] [timer.py:199:stop] epoch=0/micro_step=440/global_step=440, RunningAvgSamplesPerSec=104.48292126996193, CurrSamplesPerSec=108.55886356632661, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:03:44,344] [INFO] [logging.py:96:log_dist] [Rank 0] step=440, skipped=7, lr=[4.97948256947014e-06, 4.97948256947014e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 439|ppo_ep: 1|act_loss: -0.0447998046875|cri_loss: -0.0199737548828125|unsuper_loss: 0.0
-average reward score: 6.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.47%) |Training time=0.46s (21.03%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.33
-epoch: 0|step: 440|ppo_ep: 1|act_loss: 0.052581787109375|cri_loss: 0.0293731689453125|unsuper_loss: 0.0
-average reward score: 5.6640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.96%) |Training time=0.44s (20.49%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.33
-epoch: 0|step: 441|ppo_ep: 1|act_loss: 0.05633544921875|cri_loss: 0.03546142578125|unsuper_loss: 0.0
-average reward score: 5.51953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.06%) |Training time=0.44s (20.41%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.33
-epoch: 0|step: 442|ppo_ep: 1|act_loss: -0.0230560302734375|cri_loss: 0.0008544921875|unsuper_loss: 0.0
-average reward score: 5.3671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.69%) |Training time=0.45s (20.77%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33
-epoch: 0|step: 443|ppo_ep: 1|act_loss: 0.016937255859375|cri_loss: 0.011688232421875|unsuper_loss: 0.0
-average reward score: 6.484375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.60%) |Training time=0.45s (20.87%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.34
-epoch: 0|step: 444|ppo_ep: 1|act_loss: 0.0214691162109375|cri_loss: 0.01448822021484375|unsuper_loss: 0.0
-average reward score: 4.859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.07%) |Training time=0.44s (20.37%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.34
-epoch: 0|step: 445|ppo_ep: 1|act_loss: -0.0421142578125|cri_loss: -0.016326904296875|unsuper_loss: 0.0
-average reward score: 6.421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.77%) |Training time=0.45s (20.70%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34
-epoch: 0|step: 446|ppo_ep: 1|act_loss: 0.120361328125|cri_loss: 0.064453125|unsuper_loss: 0.0
-average reward score: 5.7890625
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.28%) |Training time=0.45s (20.36%) |Others=0.14 (6.36%)|CurSamplesPerSec=14.44 |AvgSamplesPerSec=14.34
-epoch: 0|step: 447|ppo_ep: 1|act_loss: -0.0311126708984375|cri_loss: -0.0119476318359375|unsuper_loss: 0.0
-average reward score: 5.62109375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.79%) |Training time=0.45s (20.65%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.34
-epoch: 0|step: 448|ppo_ep: 1|act_loss: -0.024383544921875|cri_loss: -0.00986480712890625|unsuper_loss: 0.0
-average reward score: 5.66796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.41%) |Training time=0.43s (20.04%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
-[2023-04-14 09:04:05,991] [INFO] [logging.py:96:log_dist] [Rank 0] step=450, skipped=7, lr=[9.607990861051154e-06, 9.607990861051154e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:04:06,009] [INFO] [timer.py:199:stop] epoch=0/micro_step=450/global_step=450, RunningAvgSamplesPerSec=104.64871045065954, CurrSamplesPerSec=110.18387111339149, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:04:06,104] [INFO] [logging.py:96:log_dist] [Rank 0] step=450, skipped=7, lr=[4.978233606762256e-06, 4.978233606762256e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 449|ppo_ep: 1|act_loss: -0.11602783203125|cri_loss: -0.0455322265625|unsuper_loss: 0.0
-average reward score: 5.38671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.53%) |Training time=0.45s (20.86%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34
-epoch: 0|step: 450|ppo_ep: 1|act_loss: 0.019927978515625|cri_loss: 0.0127716064453125|unsuper_loss: 0.0
-average reward score: 6.1171875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.35%) |Training time=0.46s (21.11%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.34
-epoch: 0|step: 451|ppo_ep: 1|act_loss: 0.113037109375|cri_loss: 0.065673828125|unsuper_loss: 0.0
-average reward score: 5.57421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.80%) |Training time=0.45s (20.68%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34
-epoch: 0|step: 452|ppo_ep: 1|act_loss: 0.041015625|cri_loss: 0.0259552001953125|unsuper_loss: 0.0
-average reward score: 6.6640625
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.80s (76.88%) |Training time=0.44s (18.90%) |Others=0.10 (4.21%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.34
-epoch: 0|step: 453|ppo_ep: 1|act_loss: -0.0408935546875|cri_loss: -0.0171661376953125|unsuper_loss: 0.0
-average reward score: 6.30859375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.65s (74.98%) |Training time=0.45s (20.53%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.34
-epoch: 0|step: 454|ppo_ep: 1|act_loss: -0.0245361328125|cri_loss: -0.01012420654296875|unsuper_loss: 0.0
-average reward score: 5.98046875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.83%) |Training time=0.45s (20.65%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34
-epoch: 0|step: 455|ppo_ep: 1|act_loss: 0.012359619140625|cri_loss: 0.01100921630859375|unsuper_loss: 0.0
-average reward score: 6.6953125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.75%) |Training time=0.45s (20.72%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.34
-epoch: 0|step: 456|ppo_ep: 1|act_loss: 0.0537109375|cri_loss: 0.0309906005859375|unsuper_loss: 0.0
-average reward score: 6.28515625
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.13%) |Training time=0.45s (20.41%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.51 |AvgSamplesPerSec=14.34
-epoch: 0|step: 457|ppo_ep: 1|act_loss: 0.038177490234375|cri_loss: 0.026824951171875|unsuper_loss: 0.0
-average reward score: 6.890625
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.73s (75.83%) |Training time=0.45s (19.71%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.04 |AvgSamplesPerSec=14.34
-epoch: 0|step: 458|ppo_ep: 1|act_loss: 0.062255859375|cri_loss: 0.0347900390625|unsuper_loss: 0.0
-average reward score: 6.5703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.63%) |Training time=0.45s (20.83%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34
-[2023-04-14 09:04:28,063] [INFO] [logging.py:96:log_dist] [Rank 0] step=460, skipped=7, lr=[9.60550946756204e-06, 9.60550946756204e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:04:28,081] [INFO] [timer.py:199:stop] epoch=0/micro_step=460/global_step=460, RunningAvgSamplesPerSec=104.78282677177035, CurrSamplesPerSec=112.42672956817755, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:04:28,174] [INFO] [logging.py:96:log_dist] [Rank 0] step=460, skipped=7, lr=[4.976947910653907e-06, 4.976947910653907e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 459|ppo_ep: 1|act_loss: 0.0418701171875|cri_loss: 0.0247802734375|unsuper_loss: 0.0
-average reward score: 6.96875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.85%) |Training time=0.45s (20.62%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
-epoch: 0|step: 460|ppo_ep: 1|act_loss: -0.0098114013671875|cri_loss: 0.0029449462890625|unsuper_loss: 0.0
-average reward score: 7.3515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.78%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.34
-epoch: 0|step: 461|ppo_ep: 1|act_loss: 0.038818359375|cri_loss: 0.021240234375|unsuper_loss: 0.0
-average reward score: 6.46484375
--------------------------------------------------------------------------------------
-|E2E latency=3.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (50.91%) |Training time=0.45s (14.09%) |Others=1.12 (35.00%)|CurSamplesPerSec=10.02 |AvgSamplesPerSec=14.33
-epoch: 0|step: 462|ppo_ep: 1|act_loss: -0.0008087158203125|cri_loss: 0.004665374755859375|unsuper_loss: 0.0
-average reward score: 6.25
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.11%) |Training time=0.44s (20.37%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.33
-epoch: 0|step: 463|ppo_ep: 1|act_loss: -0.00408172607421875|cri_loss: 0.0038604736328125|unsuper_loss: 0.0
-average reward score: 6.27734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.30%) |Training time=0.44s (20.13%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.33
-epoch: 0|step: 464|ppo_ep: 1|act_loss: -0.07135009765625|cri_loss: -0.0275421142578125|unsuper_loss: 0.0
-average reward score: 6.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.95%) |Training time=0.45s (20.52%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.33
-epoch: 0|step: 465|ppo_ep: 1|act_loss: 0.051300048828125|cri_loss: 0.0270538330078125|unsuper_loss: 0.0
-average reward score: 5.9921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.74%) |Training time=0.45s (20.72%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.33
-epoch: 0|step: 466|ppo_ep: 1|act_loss: 0.06756591796875|cri_loss: 0.042694091796875|unsuper_loss: 0.0
-average reward score: 6.3125
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.65%) |Training time=0.45s (19.12%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.33
-epoch: 0|step: 467|ppo_ep: 1|act_loss: -0.0213623046875|cri_loss: -0.00701141357421875|unsuper_loss: 0.0
-average reward score: 7.28515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.73%) |Training time=0.45s (20.73%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33
-epoch: 0|step: 468|ppo_ep: 1|act_loss: 0.037811279296875|cri_loss: 0.0226287841796875|unsuper_loss: 0.0
-average reward score: 6.66015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.99%) |Training time=0.44s (20.47%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.33
-[2023-04-14 09:04:50,963] [INFO] [logging.py:96:log_dist] [Rank 0] step=470, skipped=7, lr=[9.602957215390267e-06, 9.602957215390267e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:04:50,981] [INFO] [timer.py:199:stop] epoch=0/micro_step=470/global_step=470, RunningAvgSamplesPerSec=104.94226242948741, CurrSamplesPerSec=112.22189910894872, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:04:51,073] [INFO] [logging.py:96:log_dist] [Rank 0] step=470, skipped=7, lr=[4.9756255002022105e-06, 4.9756255002022105e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 469|ppo_ep: 1|act_loss: 0.03900146484375|cri_loss: 0.0225982666015625|unsuper_loss: 0.0
-average reward score: 6.65234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.87%) |Training time=0.45s (20.62%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34
-epoch: 0|step: 470|ppo_ep: 1|act_loss: -0.027069091796875|cri_loss: -0.00945281982421875|unsuper_loss: 0.0
-average reward score: 5.79296875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.29%) |Training time=0.44s (20.18%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.34
-epoch: 0|step: 471|ppo_ep: 1|act_loss: -0.000213623046875|cri_loss: 0.00934600830078125|unsuper_loss: 0.0
-average reward score: 5.99609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.66%) |Training time=0.45s (20.81%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34
-epoch: 0|step: 472|ppo_ep: 1|act_loss: 0.05279541015625|cri_loss: 0.028594970703125|unsuper_loss: 0.0
-average reward score: 6.9375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.19%) |Training time=0.44s (20.25%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34
-epoch: 0|step: 473|ppo_ep: 1|act_loss: -0.17431640625|cri_loss: -0.073974609375|unsuper_loss: 0.0
-average reward score: 5.41015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.70%) |Training time=0.45s (20.79%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.34
-[2023-04-14 09:05:01,823] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, but hysteresis is 2. Reducing hysteresis to 1
-[2023-04-14 09:05:01,909] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
-epoch: 0|step: 474|ppo_ep: 1|act_loss: -0.1414794921875|cri_loss: -0.056488037109375|unsuper_loss: 0.0
-average reward score: 5.6796875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.93%) |Training time=0.43s (19.89%) |Others=0.09 (4.18%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.34
-epoch: 0|step: 475|ppo_ep: 1|act_loss: 0.017120361328125|cri_loss: 0.01279449462890625|unsuper_loss: 0.0
-average reward score: 6.078125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.76%) |Training time=0.45s (20.71%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.34
-[2023-04-14 09:05:06,139] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096
-epoch: 0|step: 476|ppo_ep: 1|act_loss: -0.20263671875|cri_loss: -0.08056640625|unsuper_loss: 0.0
-average reward score: 4.42578125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.71%) |Training time=0.42s (19.72%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.34
-epoch: 0|step: 477|ppo_ep: 1|act_loss: -0.1700439453125|cri_loss: -0.07275390625|unsuper_loss: 0.0
-average reward score: 5.1328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.82%) |Training time=0.45s (20.64%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34
-epoch: 0|step: 478|ppo_ep: 1|act_loss: 0.039215087890625|cri_loss: 0.0303955078125|unsuper_loss: 0.0
-average reward score: 3.51953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.19%) |Training time=0.44s (20.26%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.34
-[2023-04-14 09:05:12,638] [INFO] [logging.py:96:log_dist] [Rank 0] step=480, skipped=9, lr=[9.600864420788175e-06, 9.600864420788175e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:05:12,656] [INFO] [timer.py:199:stop] epoch=0/micro_step=480/global_step=480, RunningAvgSamplesPerSec=105.12317312034696, CurrSamplesPerSec=110.69512469701006, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:05:12,749] [INFO] [logging.py:96:log_dist] [Rank 0] step=480, skipped=8, lr=[4.9744039562213675e-06, 4.9744039562213675e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 479|ppo_ep: 1|act_loss: 0.248291015625|cri_loss: 0.168701171875|unsuper_loss: 0.0
-average reward score: 2.625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.72%) |Training time=0.45s (20.78%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.34
-epoch: 0|step: 480|ppo_ep: 1|act_loss: 0.0872802734375|cri_loss: 0.067626953125|unsuper_loss: 0.0
-average reward score: 2.62109375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.40%) |Training time=0.46s (21.10%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.34
-epoch: 0|step: 481|ppo_ep: 1|act_loss: -0.034820556640625|cri_loss: -0.014739990234375|unsuper_loss: 0.0
-average reward score: 3.9765625
--------------------------------------------------------------------------------------
-|E2E latency=3.11s |Gather latency=0.00s (0.00%) |Generate time=1.80s (57.93%) |Training time=0.45s (14.32%) |Others=0.86 (27.75%)|CurSamplesPerSec=10.28 |AvgSamplesPerSec=14.33
-epoch: 0|step: 482|ppo_ep: 1|act_loss: -0.1807861328125|cri_loss: -0.0804443359375|unsuper_loss: 0.0
-average reward score: 3.55859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.33%) |Training time=0.44s (20.12%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.33
-epoch: 0|step: 483|ppo_ep: 1|act_loss: 0.0008697509765625|cri_loss: 0.007511138916015625|unsuper_loss: 0.0
-average reward score: 4.078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.25%) |Training time=0.44s (20.22%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.33
-epoch: 0|step: 484|ppo_ep: 1|act_loss: 0.034332275390625|cri_loss: 0.033843994140625|unsuper_loss: 0.0
-average reward score: 4.1328125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.97%) |Training time=0.44s (20.25%) |Others=0.10 (4.78%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.34
-epoch: 0|step: 485|ppo_ep: 1|act_loss: 0.0885009765625|cri_loss: 0.05035400390625|unsuper_loss: 0.0
-average reward score: 4.46484375
--------------------------------------------------------------------------------------
-|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.75s (76.50%) |Training time=0.44s (19.20%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.99 |AvgSamplesPerSec=14.33
-epoch: 0|step: 486|ppo_ep: 1|act_loss: 0.040283203125|cri_loss: 0.024200439453125|unsuper_loss: 0.0
-average reward score: 4.3359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.34%) |Training time=0.44s (20.12%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34
-epoch: 0|step: 487|ppo_ep: 1|act_loss: -0.05267333984375|cri_loss: -0.022125244140625|unsuper_loss: 0.0
-average reward score: 4.96484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.34%) |Training time=0.44s (20.12%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.34
-epoch: 0|step: 488|ppo_ep: 1|act_loss: 0.0249786376953125|cri_loss: 0.03216552734375|unsuper_loss: 0.0
-average reward score: 4.9453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.36%) |Training time=0.44s (20.09%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34
-[2023-04-14 09:05:35,437] [INFO] [logging.py:96:log_dist] [Rank 0] step=490, skipped=9, lr=[9.598184719026e-06, 9.598184719026e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:05:35,455] [INFO] [timer.py:199:stop] epoch=0/micro_step=490/global_step=490, RunningAvgSamplesPerSec=105.31313206740428, CurrSamplesPerSec=116.99643739730386, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:05:35,547] [INFO] [logging.py:96:log_dist] [Rank 0] step=490, skipped=8, lr=[4.973011842968471e-06, 4.973011842968471e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 489|ppo_ep: 1|act_loss: -0.0692138671875|cri_loss: -0.027069091796875|unsuper_loss: 0.0
-average reward score: 4.6328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.38%) |Training time=0.44s (20.10%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34
-epoch: 0|step: 490|ppo_ep: 1|act_loss: 0.021575927734375|cri_loss: 0.01507568359375|unsuper_loss: 0.0
-average reward score: 5.07421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.37%) |Training time=0.44s (20.09%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.34
-epoch: 0|step: 491|ppo_ep: 1|act_loss: 0.0997314453125|cri_loss: 0.055267333984375|unsuper_loss: 0.0
-average reward score: 6.04296875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.50%) |Training time=0.43s (19.98%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34
-epoch: 0|step: 492|ppo_ep: 1|act_loss: 0.054840087890625|cri_loss: 0.03228759765625|unsuper_loss: 0.0
-average reward score: 5.1484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.90%) |Training time=0.42s (19.56%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
-epoch: 0|step: 493|ppo_ep: 1|act_loss: 0.181884765625|cri_loss: 0.099365234375|unsuper_loss: 0.0
-average reward score: 4.8984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.31%) |Training time=0.44s (20.14%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
-epoch: 0|step: 494|ppo_ep: 1|act_loss: 0.259765625|cri_loss: 0.1429443359375|unsuper_loss: 0.0
-average reward score: 4.8203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.40%) |Training time=0.44s (20.06%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34
-epoch: 0|step: 495|ppo_ep: 1|act_loss: -0.0028533935546875|cri_loss: 0.007110595703125|unsuper_loss: 0.0
-average reward score: 5.30859375
--------------------------------------------------------------------------------------
-|E2E latency=3.01s |Gather latency=0.00s (0.00%) |Generate time=1.65s (54.93%) |Training time=0.53s (17.57%) |Others=0.83 (27.49%)|CurSamplesPerSec=10.64 |AvgSamplesPerSec=14.33
-epoch: 0|step: 496|ppo_ep: 1|act_loss: 0.019744873046875|cri_loss: 0.023468017578125|unsuper_loss: 0.0
-average reward score: 4.9453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.93%) |Training time=0.45s (20.52%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33
-epoch: 0|step: 497|ppo_ep: 1|act_loss: -0.0919189453125|cri_loss: -0.03338623046875|unsuper_loss: 0.0
-average reward score: 6.00390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.66%) |Training time=0.45s (20.80%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33
-epoch: 0|step: 498|ppo_ep: 1|act_loss: -0.146728515625|cri_loss: -0.05950927734375|unsuper_loss: 0.0
-average reward score: 4.94921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.63%) |Training time=0.45s (20.84%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.33
-[2023-04-14 09:05:57,994] [INFO] [logging.py:96:log_dist] [Rank 0] step=500, skipped=9, lr=[9.595434267151607e-06, 9.595434267151607e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:05:58,012] [INFO] [timer.py:199:stop] epoch=0/micro_step=500/global_step=500, RunningAvgSamplesPerSec=105.46009089853679, CurrSamplesPerSec=114.55113153375825, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:05:58,105] [INFO] [logging.py:96:log_dist] [Rank 0] step=500, skipped=8, lr=[4.971583073714247e-06, 4.971583073714247e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 499|ppo_ep: 1|act_loss: -0.0750732421875|cri_loss: -0.0311279296875|unsuper_loss: 0.0
-average reward score: 5.1328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.10%) |Training time=0.44s (20.35%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33
-epoch: 0|step: 500|ppo_ep: 1|act_loss: -0.033935546875|cri_loss: -0.014617919921875|unsuper_loss: 0.0
-average reward score: 4.71875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.77%) |Training time=0.45s (20.69%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.34
-epoch: 0|step: 501|ppo_ep: 1|act_loss: 0.173583984375|cri_loss: 0.0955810546875|unsuper_loss: 0.0
-average reward score: 3.822265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.75%) |Training time=0.45s (20.70%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
-epoch: 0|step: 502|ppo_ep: 1|act_loss: 0.10595703125|cri_loss: 0.0550537109375|unsuper_loss: 0.0
-average reward score: 3.6640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.89%) |Training time=0.45s (20.58%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.34
-epoch: 0|step: 503|ppo_ep: 1|act_loss: 0.12017822265625|cri_loss: 0.0670166015625|unsuper_loss: 0.0
-average reward score: 4.44140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.75%) |Training time=0.45s (20.71%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.34
-epoch: 0|step: 504|ppo_ep: 1|act_loss: -0.024993896484375|cri_loss: -0.0099639892578125|unsuper_loss: 0.0
-average reward score: 6.3515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.05%) |Training time=0.44s (20.41%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34
-epoch: 0|step: 505|ppo_ep: 1|act_loss: -0.0570068359375|cri_loss: -0.0201263427734375|unsuper_loss: 0.0
-average reward score: 5.26171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.78%) |Training time=0.45s (20.66%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
-epoch: 0|step: 506|ppo_ep: 1|act_loss: 0.01320648193359375|cri_loss: 0.00848388671875|unsuper_loss: 0.0
-average reward score: 5.80859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.74%) |Training time=0.45s (20.70%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
-epoch: 0|step: 507|ppo_ep: 1|act_loss: 0.11767578125|cri_loss: 0.06378173828125|unsuper_loss: 0.0
-average reward score: 5.80078125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.16%) |Training time=0.42s (19.30%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.34
-epoch: 0|step: 508|ppo_ep: 1|act_loss: 0.20751953125|cri_loss: 0.11810302734375|unsuper_loss: 0.0
-average reward score: 6.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.82%) |Training time=0.45s (20.63%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
-[2023-04-14 09:06:19,713] [INFO] [logging.py:96:log_dist] [Rank 0] step=510, skipped=9, lr=[9.592613105933331e-06, 9.592613105933331e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:06:19,731] [INFO] [timer.py:199:stop] epoch=0/micro_step=510/global_step=510, RunningAvgSamplesPerSec=105.59803688950436, CurrSamplesPerSec=110.03175743087647, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:06:19,824] [INFO] [logging.py:96:log_dist] [Rank 0] step=510, skipped=8, lr=[4.970117669636501e-06, 4.970117669636501e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 509|ppo_ep: 1|act_loss: 0.1334228515625|cri_loss: 0.0731201171875|unsuper_loss: 0.0
-average reward score: 5.1796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.56%) |Training time=0.45s (20.92%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
-epoch: 0|step: 510|ppo_ep: 1|act_loss: 0.051361083984375|cri_loss: 0.03314208984375|unsuper_loss: 0.0
-average reward score: 6.3984375
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.64%) |Training time=0.45s (19.15%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.68 |AvgSamplesPerSec=14.34
-epoch: 0|step: 511|ppo_ep: 1|act_loss: 0.033843994140625|cri_loss: 0.02349853515625|unsuper_loss: 0.0
-average reward score: 6.17578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.99%) |Training time=0.44s (20.46%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.34
-epoch: 0|step: 512|ppo_ep: 1|act_loss: -0.001708984375|cri_loss: 0.003505706787109375|unsuper_loss: 0.0
-average reward score: 5.96484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.53%) |Training time=0.45s (20.92%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
-epoch: 0|step: 513|ppo_ep: 1|act_loss: 0.069091796875|cri_loss: 0.04083251953125|unsuper_loss: 0.0
-average reward score: 5.02734375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.86%) |Training time=0.45s (20.61%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.34
-epoch: 0|step: 514|ppo_ep: 1|act_loss: 0.059661865234375|cri_loss: 0.032440185546875|unsuper_loss: 0.0
-average reward score: 4.9375
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.66s (73.99%) |Training time=0.49s (21.61%) |Others=0.10 (4.40%)|CurSamplesPerSec=14.25 |AvgSamplesPerSec=14.34
-epoch: 0|step: 515|ppo_ep: 1|act_loss: 0.04168701171875|cri_loss: 0.021697998046875|unsuper_loss: 0.0
-average reward score: 4.65234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.28%) |Training time=0.44s (20.17%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34
-epoch: 0|step: 516|ppo_ep: 1|act_loss: -0.0787353515625|cri_loss: -0.03155517578125|unsuper_loss: 0.0
-average reward score: 4.28125
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.63s (70.07%) |Training time=0.44s (18.84%) |Others=0.26 (11.09%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.34
-epoch: 0|step: 517|ppo_ep: 1|act_loss: -0.0179443359375|cri_loss: -0.00739288330078125|unsuper_loss: 0.0
-average reward score: 4.2734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.09%) |Training time=0.44s (20.33%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34
-epoch: 0|step: 518|ppo_ep: 1|act_loss: -0.05718994140625|cri_loss: -0.0237884521484375|unsuper_loss: 0.0
-average reward score: 4.625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.80%) |Training time=0.45s (20.66%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.34
-[2023-04-14 09:06:41,836] [INFO] [logging.py:96:log_dist] [Rank 0] step=520, skipped=9, lr=[9.589721277187583e-06, 9.589721277187583e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:06:41,854] [INFO] [timer.py:199:stop] epoch=0/micro_step=520/global_step=520, RunningAvgSamplesPerSec=105.71696360506587, CurrSamplesPerSec=113.11680590894035, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:06:41,947] [INFO] [logging.py:96:log_dist] [Rank 0] step=520, skipped=8, lr=[4.968615652456056e-06, 4.968615652456056e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 519|ppo_ep: 1|act_loss: -0.0887451171875|cri_loss: -0.041229248046875|unsuper_loss: 0.0
-average reward score: 4.7265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.98%) |Training time=0.45s (20.50%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.34
-epoch: 0|step: 520|ppo_ep: 1|act_loss: -0.027496337890625|cri_loss: -0.00603485107421875|unsuper_loss: 0.0
-average reward score: 4.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.76%) |Training time=0.45s (20.69%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.35
-epoch: 0|step: 521|ppo_ep: 1|act_loss: -0.048919677734375|cri_loss: -0.023101806640625|unsuper_loss: 0.0
-average reward score: 4.20703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.02%) |Training time=0.44s (20.44%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.35
-epoch: 0|step: 522|ppo_ep: 1|act_loss: 0.08306884765625|cri_loss: 0.04388427734375|unsuper_loss: 0.0
-average reward score: 4.59375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.75%) |Training time=0.43s (19.69%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.35
-epoch: 0|step: 523|ppo_ep: 1|act_loss: -0.028228759765625|cri_loss: -0.0107574462890625|unsuper_loss: 0.0
-average reward score: 5.5625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.22%) |Training time=0.44s (20.22%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.35
-epoch: 0|step: 524|ppo_ep: 1|act_loss: -0.03082275390625|cri_loss: -0.01323699951171875|unsuper_loss: 0.0
-average reward score: 5.1328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.44%) |Training time=0.46s (21.03%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.35
-epoch: 0|step: 525|ppo_ep: 1|act_loss: -0.05889892578125|cri_loss: -0.026611328125|unsuper_loss: 0.0
-average reward score: 5.53515625
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.84%) |Training time=0.44s (18.93%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.70 |AvgSamplesPerSec=14.35
-epoch: 0|step: 526|ppo_ep: 1|act_loss: 0.016693115234375|cri_loss: 0.01453399658203125|unsuper_loss: 0.0
-average reward score: 5.62890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.88%) |Training time=0.45s (20.57%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.35
-epoch: 0|step: 527|ppo_ep: 1|act_loss: 0.04449462890625|cri_loss: 0.025054931640625|unsuper_loss: 0.0
-average reward score: 6.07421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.64%) |Training time=0.45s (20.80%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.35
-epoch: 0|step: 528|ppo_ep: 1|act_loss: 0.0195465087890625|cri_loss: 0.01230621337890625|unsuper_loss: 0.0
-average reward score: 7.73046875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.85%) |Training time=0.45s (20.59%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.35
-[2023-04-14 09:07:03,705] [INFO] [logging.py:96:log_dist] [Rank 0] step=530, skipped=9, lr=[9.586758823778245e-06, 9.586758823778245e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:07:03,723] [INFO] [timer.py:199:stop] epoch=0/micro_step=530/global_step=530, RunningAvgSamplesPerSec=105.85254516677392, CurrSamplesPerSec=112.33056644956756, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:07:03,816] [INFO] [logging.py:96:log_dist] [Rank 0] step=530, skipped=8, lr=[4.96707704443643e-06, 4.96707704443643e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 529|ppo_ep: 1|act_loss: 0.120849609375|cri_loss: 0.06866455078125|unsuper_loss: 0.0
-average reward score: 6.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.83%) |Training time=0.45s (20.64%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.35
-epoch: 0|step: 530|ppo_ep: 1|act_loss: 0.026214599609375|cri_loss: 0.016265869140625|unsuper_loss: 0.0
-average reward score: 5.6171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.73%) |Training time=0.45s (20.65%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.35
-epoch: 0|step: 531|ppo_ep: 1|act_loss: -0.0165557861328125|cri_loss: -0.006198883056640625|unsuper_loss: 0.0
-average reward score: 5.94921875
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.62s (71.93%) |Training time=0.46s (20.20%) |Others=0.18 (7.87%)|CurSamplesPerSec=14.20 |AvgSamplesPerSec=14.35
-epoch: 0|step: 532|ppo_ep: 1|act_loss: -0.03509521484375|cri_loss: -0.012542724609375|unsuper_loss: 0.0
-average reward score: 5.18359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.56%) |Training time=0.45s (20.89%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.35
-epoch: 0|step: 533|ppo_ep: 1|act_loss: 0.018951416015625|cri_loss: 0.0110321044921875|unsuper_loss: 0.0
-average reward score: 6.78125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.88%) |Training time=0.44s (20.55%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.35
-epoch: 0|step: 534|ppo_ep: 1|act_loss: 0.0035495758056640625|cri_loss: 0.0029582977294921875|unsuper_loss: 0.0
-average reward score: 5.83984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.72%) |Training time=0.45s (20.71%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.35
-epoch: 0|step: 535|ppo_ep: 1|act_loss: 0.0374755859375|cri_loss: 0.02105712890625|unsuper_loss: 0.0
-average reward score: 6.734375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.86%) |Training time=0.45s (20.63%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.35
-epoch: 0|step: 536|ppo_ep: 1|act_loss: -0.029327392578125|cri_loss: -0.01258087158203125|unsuper_loss: 0.0
-average reward score: 6.7578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.68%) |Training time=0.45s (20.76%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.35
-epoch: 0|step: 537|ppo_ep: 1|act_loss: 0.035675048828125|cri_loss: 0.0200042724609375|unsuper_loss: 0.0
-average reward score: 6.4296875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.73%) |Training time=0.43s (19.70%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.35
-epoch: 0|step: 538|ppo_ep: 1|act_loss: 0.0178375244140625|cri_loss: 0.010833740234375|unsuper_loss: 0.0
-average reward score: 5.0
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.46%) |Training time=0.44s (20.02%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.36
-[2023-04-14 09:07:25,507] [INFO] [logging.py:96:log_dist] [Rank 0] step=540, skipped=9, lr=[9.583725789616017e-06, 9.583725789616017e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:07:25,525] [INFO] [timer.py:199:stop] epoch=0/micro_step=540/global_step=540, RunningAvgSamplesPerSec=105.97276576876709, CurrSamplesPerSec=111.52968940341991, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:07:25,618] [INFO] [logging.py:96:log_dist] [Rank 0] step=540, skipped=8, lr=[4.965501868383507e-06, 4.965501868383507e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 539|ppo_ep: 1|act_loss: 0.04998779296875|cri_loss: 0.0268096923828125|unsuper_loss: 0.0
-average reward score: 5.23046875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.81%) |Training time=0.45s (20.68%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.36
-epoch: 0|step: 540|ppo_ep: 1|act_loss: 0.0016565322875976562|cri_loss: 0.00145721435546875|unsuper_loss: 0.0
-average reward score: 5.8125
--------------------------------------------------------------------------------------
-|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.69s (73.78%) |Training time=0.50s (21.90%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.98 |AvgSamplesPerSec=14.36
-epoch: 0|step: 541|ppo_ep: 1|act_loss: -0.017913818359375|cri_loss: -0.0053253173828125|unsuper_loss: 0.0
-average reward score: 5.453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.86%) |Training time=0.45s (20.61%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.36
-epoch: 0|step: 542|ppo_ep: 1|act_loss: -0.090087890625|cri_loss: -0.04132080078125|unsuper_loss: 0.0
-average reward score: 6.328125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.99%) |Training time=0.45s (20.44%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.36
-epoch: 0|step: 543|ppo_ep: 1|act_loss: -0.0291748046875|cri_loss: -0.0136871337890625|unsuper_loss: 0.0
-average reward score: 5.1484375
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.48%) |Training time=0.44s (20.04%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.51 |AvgSamplesPerSec=14.36
-epoch: 0|step: 544|ppo_ep: 1|act_loss: -0.0049285888671875|cri_loss: -0.0016317367553710938|unsuper_loss: 0.0
-average reward score: 5.3046875
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.73s (75.89%) |Training time=0.45s (19.61%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.07 |AvgSamplesPerSec=14.36
-epoch: 0|step: 545|ppo_ep: 1|act_loss: 0.0345458984375|cri_loss: 0.01959228515625|unsuper_loss: 0.0
-average reward score: 5.33984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.14%) |Training time=0.44s (20.33%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.36
-epoch: 0|step: 546|ppo_ep: 1|act_loss: 0.030914306640625|cri_loss: 0.016998291015625|unsuper_loss: 0.0
-average reward score: 4.8125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.85%) |Training time=0.45s (20.61%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.36
-epoch: 0|step: 547|ppo_ep: 1|act_loss: -0.01776123046875|cri_loss: -0.00774383544921875|unsuper_loss: 0.0
-average reward score: 5.34375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.54%) |Training time=0.45s (20.92%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.36
-epoch: 0|step: 548|ppo_ep: 1|act_loss: -0.08251953125|cri_loss: -0.03662109375|unsuper_loss: 0.0
-average reward score: 5.484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.79%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.36
-[2023-04-14 09:07:47,483] [INFO] [logging.py:96:log_dist] [Rank 0] step=550, skipped=9, lr=[9.58062221965779e-06, 9.58062221965779e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:07:47,501] [INFO] [timer.py:199:stop] epoch=0/micro_step=550/global_step=550, RunningAvgSamplesPerSec=106.10481150577755, CurrSamplesPerSec=113.16315685191665, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:07:47,594] [INFO] [logging.py:96:log_dist] [Rank 0] step=550, skipped=8, lr=[4.963890147645195e-06, 4.963890147645195e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 549|ppo_ep: 1|act_loss: 0.0102081298828125|cri_loss: 0.0094757080078125|unsuper_loss: 0.0
-average reward score: 5.16796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.92%) |Training time=0.45s (20.56%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.36
-epoch: 0|step: 550|ppo_ep: 1|act_loss: 0.00433349609375|cri_loss: 0.0032825469970703125|unsuper_loss: 0.0
-average reward score: 5.84375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.92%) |Training time=0.45s (20.55%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.36
-epoch: 0|step: 551|ppo_ep: 1|act_loss: 0.11962890625|cri_loss: 0.0660400390625|unsuper_loss: 0.0
-average reward score: 5.328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.50%) |Training time=0.46s (20.97%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.36
-epoch: 0|step: 552|ppo_ep: 1|act_loss: 0.033538818359375|cri_loss: 0.019012451171875|unsuper_loss: 0.0
-average reward score: 5.9921875
--------------------------------------------------------------------------------------
-|E2E latency=2.53s |Gather latency=0.00s (0.00%) |Generate time=1.64s (64.93%) |Training time=0.43s (16.84%) |Others=0.46 (18.23%)|CurSamplesPerSec=12.66 |AvgSamplesPerSec=14.36
-epoch: 0|step: 553|ppo_ep: 1|act_loss: -0.00921630859375|cri_loss: -0.0028324127197265625|unsuper_loss: 0.0
-average reward score: 5.51171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.33%) |Training time=0.44s (20.09%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.36
-epoch: 0|step: 554|ppo_ep: 1|act_loss: -0.01349639892578125|cri_loss: -0.005279541015625|unsuper_loss: 0.0
-average reward score: 6.2578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.38%) |Training time=0.46s (21.06%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.36
-epoch: 0|step: 555|ppo_ep: 1|act_loss: -0.00868988037109375|cri_loss: -0.0027103424072265625|unsuper_loss: 0.0
-average reward score: 5.6484375
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.32%) |Training time=0.45s (19.43%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.74 |AvgSamplesPerSec=14.36
-epoch: 0|step: 556|ppo_ep: 1|act_loss: -0.038116455078125|cri_loss: -0.01690673828125|unsuper_loss: 0.0
-average reward score: 5.9375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.64%) |Training time=0.45s (20.81%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.36
-epoch: 0|step: 557|ppo_ep: 1|act_loss: -0.014892578125|cri_loss: -0.0054931640625|unsuper_loss: 0.0
-average reward score: 5.88671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.69%) |Training time=0.45s (20.75%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.36
-epoch: 0|step: 558|ppo_ep: 1|act_loss: 0.01316070556640625|cri_loss: 0.0116729736328125|unsuper_loss: 0.0
-average reward score: 5.16796875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.86%) |Training time=0.46s (20.84%) |Others=0.12 (5.31%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.36
-[2023-04-14 09:08:09,697] [INFO] [logging.py:96:log_dist] [Rank 0] step=560, skipped=9, lr=[9.577448159905952e-06, 9.577448159905952e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:08:09,715] [INFO] [timer.py:199:stop] epoch=0/micro_step=560/global_step=560, RunningAvgSamplesPerSec=106.20737357610386, CurrSamplesPerSec=110.12176447228941, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:08:09,810] [INFO] [logging.py:96:log_dist] [Rank 0] step=560, skipped=8, lr=[4.962241906111083e-06, 4.962241906111083e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 559|ppo_ep: 1|act_loss: -0.028106689453125|cri_loss: -0.01242828369140625|unsuper_loss: 0.0
-average reward score: 6.11328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.50%) |Training time=0.45s (20.90%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.36
-epoch: 0|step: 560|ppo_ep: 1|act_loss: -0.00113677978515625|cri_loss: -1.621246337890625e-05|unsuper_loss: 0.0
-average reward score: 6.0859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.40%) |Training time=0.46s (21.07%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.36
-epoch: 0|step: 561|ppo_ep: 1|act_loss: 0.0175323486328125|cri_loss: 0.0101165771484375|unsuper_loss: 0.0
-average reward score: 5.63671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.52%) |Training time=0.45s (20.94%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.36
-epoch: 0|step: 562|ppo_ep: 1|act_loss: 0.1058349609375|cri_loss: 0.055267333984375|unsuper_loss: 0.0
-average reward score: 5.10546875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.54%) |Training time=0.46s (20.97%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.36
-epoch: 0|step: 563|ppo_ep: 1|act_loss: 0.050018310546875|cri_loss: 0.0263671875|unsuper_loss: 0.0
-average reward score: 5.50390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.36%) |Training time=0.46s (21.10%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.36
-epoch: 0|step: 564|ppo_ep: 1|act_loss: -0.010040283203125|cri_loss: -0.003032684326171875|unsuper_loss: 0.0
-average reward score: 6.1328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.51%) |Training time=0.45s (20.85%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.36
-epoch: 0|step: 565|ppo_ep: 1|act_loss: -0.00855255126953125|cri_loss: -0.003559112548828125|unsuper_loss: 0.0
-average reward score: 5.15625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.33%) |Training time=0.46s (21.13%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.36
-epoch: 0|step: 566|ppo_ep: 1|act_loss: -0.0287017822265625|cri_loss: -0.0123443603515625|unsuper_loss: 0.0
-average reward score: 5.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.45%) |Training time=0.45s (21.00%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.36
-epoch: 0|step: 567|ppo_ep: 1|act_loss: 0.047515869140625|cri_loss: 0.027313232421875|unsuper_loss: 0.0
-average reward score: 5.0
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.33%) |Training time=0.44s (20.13%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.37
-epoch: 0|step: 568|ppo_ep: 1|act_loss: 0.07122802734375|cri_loss: 0.04351806640625|unsuper_loss: 0.0
-average reward score: 5.375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.59%) |Training time=0.43s (19.79%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.37
-[2023-04-14 09:08:31,392] [INFO] [logging.py:96:log_dist] [Rank 0] step=570, skipped=9, lr=[9.574203657407728e-06, 9.574203657407728e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:08:31,410] [INFO] [timer.py:199:stop] epoch=0/micro_step=570/global_step=570, RunningAvgSamplesPerSec=106.29109456024048, CurrSamplesPerSec=111.84186216269592, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:08:31,502] [INFO] [logging.py:96:log_dist] [Rank 0] step=570, skipped=8, lr=[4.960557168212088e-06, 4.960557168212088e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 569|ppo_ep: 1|act_loss: -0.04010009765625|cri_loss: -0.0181732177734375|unsuper_loss: 0.0
-average reward score: 4.74609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.77%) |Training time=0.45s (20.71%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.37
-epoch: 0|step: 570|ppo_ep: 1|act_loss: -0.067138671875|cri_loss: -0.0316162109375|unsuper_loss: 0.0
-average reward score: 5.09765625
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.81s (77.41%) |Training time=0.43s (18.38%) |Others=0.10 (4.21%)|CurSamplesPerSec=13.68 |AvgSamplesPerSec=14.37
-epoch: 0|step: 571|ppo_ep: 1|act_loss: -0.029998779296875|cri_loss: -0.01392364501953125|unsuper_loss: 0.0
-average reward score: 5.21875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.59%) |Training time=0.45s (20.86%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.37
-epoch: 0|step: 572|ppo_ep: 1|act_loss: -0.065673828125|cri_loss: -0.0294036865234375|unsuper_loss: 0.0
-average reward score: 4.5234375
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.65s (74.51%) |Training time=0.46s (21.02%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.49 |AvgSamplesPerSec=14.37
-epoch: 0|step: 573|ppo_ep: 1|act_loss: 0.06640625|cri_loss: 0.0472412109375|unsuper_loss: 0.0
-average reward score: 4.609375
--------------------------------------------------------------------------------------
-|E2E latency=2.78s |Gather latency=0.00s (0.00%) |Generate time=1.75s (62.78%) |Training time=0.43s (15.34%) |Others=0.61 (21.88%)|CurSamplesPerSec=11.50 |AvgSamplesPerSec=14.36
-[2023-04-14 09:08:43,056] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 4096, reducing to 2048
-epoch: 0|step: 574|ppo_ep: 1|act_loss: 0.0679931640625|cri_loss: 0.039154052734375|unsuper_loss: 0.0
-average reward score: 4.84375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.72%) |Training time=0.42s (19.69%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.36
-epoch: 0|step: 575|ppo_ep: 1|act_loss: 0.12646484375|cri_loss: 0.0697021484375|unsuper_loss: 0.0
-average reward score: 4.4765625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.58%) |Training time=0.45s (20.90%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.36
-[2023-04-14 09:08:47,490] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 576|ppo_ep: 1|act_loss: 0.1339111328125|cri_loss: 0.0732421875|unsuper_loss: 0.0
-average reward score: 4.7421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.93%) |Training time=0.45s (20.92%) |Others=0.09 (4.15%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.36
-[2023-04-14 09:08:49,655] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
-epoch: 0|step: 577|ppo_ep: 1|act_loss: 0.02734375|cri_loss: 0.025177001953125|unsuper_loss: 0.0
-average reward score: 4.15234375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.05%) |Training time=0.45s (20.79%) |Others=0.09 (4.16%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.36
-epoch: 0|step: 578|ppo_ep: 1|act_loss: 0.216064453125|cri_loss: 0.143310546875|unsuper_loss: 0.0
-average reward score: 4.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.40%) |Training time=0.46s (21.08%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.36
-[2023-04-14 09:08:53,886] [INFO] [logging.py:96:log_dist] [Rank 0] step=580, skipped=10, lr=[9.571223416337106e-06, 9.571223416337106e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:08:53,904] [INFO] [timer.py:199:stop] epoch=0/micro_step=580/global_step=580, RunningAvgSamplesPerSec=106.40314362946779, CurrSamplesPerSec=112.75446463825047, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:08:53,997] [INFO] [logging.py:96:log_dist] [Rank 0] step=580, skipped=10, lr=[4.959183117273112e-06, 4.959183117273112e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 579|ppo_ep: 1|act_loss: 0.2337646484375|cri_loss: 0.13671875|unsuper_loss: 0.0
-average reward score: 4.58984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.84%) |Training time=0.45s (20.62%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.36
-epoch: 0|step: 580|ppo_ep: 1|act_loss: 0.0645751953125|cri_loss: 0.03631591796875|unsuper_loss: 0.0
-average reward score: 5.4609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.62%) |Training time=0.45s (20.82%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.37
-epoch: 0|step: 581|ppo_ep: 1|act_loss: 0.08306884765625|cri_loss: 0.04730224609375|unsuper_loss: 0.0
-average reward score: 4.984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.60%) |Training time=0.45s (20.86%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.37
-epoch: 0|step: 582|ppo_ep: 1|act_loss: 0.236083984375|cri_loss: 0.1676025390625|unsuper_loss: 0.0
-average reward score: 3.515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.22%) |Training time=0.44s (20.20%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.37
-epoch: 0|step: 583|ppo_ep: 1|act_loss: 0.007049560546875|cri_loss: 0.005939483642578125|unsuper_loss: 0.0
-average reward score: 4.2734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.27%) |Training time=0.44s (20.18%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.37
-epoch: 0|step: 584|ppo_ep: 1|act_loss: -0.004924774169921875|cri_loss: 0.00067138671875|unsuper_loss: 0.0
-average reward score: 4.3828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.90%) |Training time=0.45s (20.56%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.37
-epoch: 0|step: 585|ppo_ep: 1|act_loss: -0.0716552734375|cri_loss: -0.0330810546875|unsuper_loss: 0.0
-average reward score: 4.59375
--------------------------------------------------------------------------------------
-|E2E latency=2.41s |Gather latency=0.00s (0.00%) |Generate time=1.80s (74.76%) |Training time=0.45s (18.58%) |Others=0.16 (6.65%)|CurSamplesPerSec=13.30 |AvgSamplesPerSec=14.37
-epoch: 0|step: 586|ppo_ep: 1|act_loss: -0.10107421875|cri_loss: -0.04376220703125|unsuper_loss: 0.0
-average reward score: 3.1640625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.72%) |Training time=0.45s (20.75%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.37
-epoch: 0|step: 587|ppo_ep: 1|act_loss: -0.05377197265625|cri_loss: -0.02410888671875|unsuper_loss: 0.0
-average reward score: 4.57421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.75%) |Training time=0.45s (20.71%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.37
-epoch: 0|step: 588|ppo_ep: 1|act_loss: -0.0753173828125|cri_loss: -0.03302001953125|unsuper_loss: 0.0
-average reward score: 5.07421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.77%) |Training time=0.45s (20.69%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.37
-[2023-04-14 09:09:15,849] [INFO] [logging.py:96:log_dist] [Rank 0] step=590, skipped=10, lr=[9.567845205974828e-06, 9.567845205974828e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:09:15,867] [INFO] [timer.py:199:stop] epoch=0/micro_step=590/global_step=590, RunningAvgSamplesPerSec=106.49800390590352, CurrSamplesPerSec=110.78347744838766, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:09:15,960] [INFO] [logging.py:96:log_dist] [Rank 0] step=590, skipped=10, lr=[4.957432749209755e-06, 4.957432749209755e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 589|ppo_ep: 1|act_loss: -0.0306396484375|cri_loss: -0.0132293701171875|unsuper_loss: 0.0
-average reward score: 5.65625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.88%) |Training time=0.45s (20.64%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.37
-epoch: 0|step: 590|ppo_ep: 1|act_loss: -0.05267333984375|cri_loss: -0.025299072265625|unsuper_loss: 0.0
-average reward score: 5.5859375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.66%) |Training time=0.45s (20.81%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.37
-epoch: 0|step: 591|ppo_ep: 1|act_loss: -0.007110595703125|cri_loss: -0.0025691986083984375|unsuper_loss: 0.0
-average reward score: 5.14453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.71%) |Training time=0.45s (20.74%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.37
-epoch: 0|step: 592|ppo_ep: 1|act_loss: -0.014495849609375|cri_loss: -0.005519866943359375|unsuper_loss: 0.0
-average reward score: 4.89453125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.50%) |Training time=0.46s (20.98%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.37
-epoch: 0|step: 593|ppo_ep: 1|act_loss: 0.04583740234375|cri_loss: 0.031585693359375|unsuper_loss: 0.0
-average reward score: 5.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.75%) |Training time=0.45s (20.72%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.37
-epoch: 0|step: 594|ppo_ep: 1|act_loss: -0.0069427490234375|cri_loss: -0.00215911865234375|unsuper_loss: 0.0
-average reward score: 5.5859375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.60%) |Training time=0.45s (20.86%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.37
-epoch: 0|step: 595|ppo_ep: 1|act_loss: 0.0129241943359375|cri_loss: 0.0084228515625|unsuper_loss: 0.0
-average reward score: 5.734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.90%) |Training time=0.45s (20.55%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.37
-epoch: 0|step: 596|ppo_ep: 1|act_loss: 0.01959228515625|cri_loss: 0.01129913330078125|unsuper_loss: 0.0
-average reward score: 5.81640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.08%) |Training time=0.44s (20.35%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.37
-epoch: 0|step: 597|ppo_ep: 1|act_loss: 0.0300140380859375|cri_loss: 0.0162200927734375|unsuper_loss: 0.0
-average reward score: 6.23046875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.08%) |Training time=0.42s (19.33%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.37
-epoch: 0|step: 598|ppo_ep: 1|act_loss: 0.0751953125|cri_loss: 0.041015625|unsuper_loss: 0.0
-average reward score: 5.95703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.68%) |Training time=0.43s (19.76%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.37
-[2023-04-14 09:09:37,614] [INFO] [logging.py:96:log_dist] [Rank 0] step=600, skipped=10, lr=[9.564396695205104e-06, 9.564396695205104e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:09:37,618] [INFO] [timer.py:199:stop] epoch=0/micro_step=600/global_step=600, RunningAvgSamplesPerSec=106.58806837441567, CurrSamplesPerSec=100.7356997842199, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:09:37,737] [INFO] [logging.py:96:log_dist] [Rank 0] step=600, skipped=10, lr=[4.955645956064821e-06, 4.955645956064821e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 599|ppo_ep: 1|act_loss: 0.011077880859375|cri_loss: 0.00719451904296875|unsuper_loss: 0.0
-average reward score: 5.87890625
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.62s (72.81%) |Training time=0.48s (21.61%) |Others=0.12 (5.58%)|CurSamplesPerSec=14.39 |AvgSamplesPerSec=14.37
-epoch: 0|step: 600|ppo_ep: 1|act_loss: 0.03582763671875|cri_loss: 0.0189361572265625|unsuper_loss: 0.0
-average reward score: 6.08984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.91%) |Training time=0.45s (20.53%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.37
-epoch: 0|step: 601|ppo_ep: 1|act_loss: -0.0229949951171875|cri_loss: -0.0109100341796875|unsuper_loss: 0.0
-average reward score: 5.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.70%) |Training time=0.46s (20.80%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.37
-epoch: 0|step: 602|ppo_ep: 1|act_loss: -0.032379150390625|cri_loss: -0.0155487060546875|unsuper_loss: 0.0
-average reward score: 5.8984375
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.73s (76.26%) |Training time=0.44s (19.28%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.07 |AvgSamplesPerSec=14.37
-epoch: 0|step: 603|ppo_ep: 1|act_loss: -0.0089874267578125|cri_loss: -0.00399017333984375|unsuper_loss: 0.0
-average reward score: 5.703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.18%) |Training time=0.44s (20.28%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.37
-epoch: 0|step: 604|ppo_ep: 1|act_loss: -0.056182861328125|cri_loss: -0.0275115966796875|unsuper_loss: 0.0
-average reward score: 5.8984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.25%) |Training time=0.44s (20.19%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.37
-epoch: 0|step: 605|ppo_ep: 1|act_loss: -0.0023899078369140625|cri_loss: -0.0002040863037109375|unsuper_loss: 0.0
-average reward score: 5.5
--------------------------------------------------------------------------------------
-|E2E latency=2.52s |Gather latency=0.00s (0.00%) |Generate time=1.63s (64.46%) |Training time=0.44s (17.39%) |Others=0.46 (18.15%)|CurSamplesPerSec=12.69 |AvgSamplesPerSec=14.37
-epoch: 0|step: 606|ppo_ep: 1|act_loss: 0.0224609375|cri_loss: 0.012969970703125|unsuper_loss: 0.0
-average reward score: 5.8828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.77%) |Training time=0.44s (20.46%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.37
-epoch: 0|step: 607|ppo_ep: 1|act_loss: 0.005245208740234375|cri_loss: 0.00289154052734375|unsuper_loss: 0.0
-average reward score: 5.80859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.24%) |Training time=0.44s (20.18%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.37
-epoch: 0|step: 608|ppo_ep: 1|act_loss: 0.064208984375|cri_loss: 0.033111572265625|unsuper_loss: 0.0
-average reward score: 6.0
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.11%) |Training time=0.44s (20.30%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.37
-[2023-04-14 09:09:59,804] [INFO] [logging.py:96:log_dist] [Rank 0] step=610, skipped=10, lr=[9.560877935143189e-06, 9.560877935143189e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:09:59,822] [INFO] [timer.py:199:stop] epoch=0/micro_step=610/global_step=610, RunningAvgSamplesPerSec=106.7184158486569, CurrSamplesPerSec=117.44905641885447, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:09:59,917] [INFO] [logging.py:96:log_dist] [Rank 0] step=610, skipped=10, lr=[4.953822764322896e-06, 4.953822764322896e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 609|ppo_ep: 1|act_loss: -0.0032367706298828125|cri_loss: -0.001232147216796875|unsuper_loss: 0.0
-average reward score: 5.40625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.27%) |Training time=0.44s (20.09%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.37
-epoch: 0|step: 610|ppo_ep: 1|act_loss: -0.024261474609375|cri_loss: -0.011322021484375|unsuper_loss: 0.0
-average reward score: 6.328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.11%) |Training time=0.44s (20.32%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.37
-epoch: 0|step: 611|ppo_ep: 1|act_loss: -0.024688720703125|cri_loss: -0.0111083984375|unsuper_loss: 0.0
-average reward score: 5.48828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.31%) |Training time=0.44s (20.12%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.38
-epoch: 0|step: 612|ppo_ep: 1|act_loss: -0.08856201171875|cri_loss: -0.041107177734375|unsuper_loss: 0.0
-average reward score: 5.3046875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.50%) |Training time=0.41s (18.88%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.38
-epoch: 0|step: 613|ppo_ep: 1|act_loss: -0.018341064453125|cri_loss: -0.0086517333984375|unsuper_loss: 0.0
-average reward score: 5.0078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.25%) |Training time=0.42s (19.09%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.38
-epoch: 0|step: 614|ppo_ep: 1|act_loss: -0.032562255859375|cri_loss: -0.014404296875|unsuper_loss: 0.0
-average reward score: 6.59375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.73%) |Training time=0.41s (18.59%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.38
-epoch: 0|step: 615|ppo_ep: 1|act_loss: 0.0167388916015625|cri_loss: 0.0095367431640625|unsuper_loss: 0.0
-average reward score: 6.2578125
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.84s (78.72%) |Training time=0.40s (17.01%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.38
-epoch: 0|step: 616|ppo_ep: 1|act_loss: 0.0194091796875|cri_loss: 0.01035308837890625|unsuper_loss: 0.0
-average reward score: 5.375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.16%) |Training time=0.44s (20.27%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.38
-epoch: 0|step: 617|ppo_ep: 1|act_loss: -0.0017223358154296875|cri_loss: -0.0002288818359375|unsuper_loss: 0.0
-average reward score: 5.72265625
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.67s (75.95%) |Training time=0.43s (19.48%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.38
-epoch: 0|step: 618|ppo_ep: 1|act_loss: 0.0160675048828125|cri_loss: 0.00909423828125|unsuper_loss: 0.0
-average reward score: 5.78125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.10%) |Training time=0.44s (20.35%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.38
-[2023-04-14 09:10:21,685] [INFO] [logging.py:96:log_dist] [Rank 0] step=620, skipped=10, lr=[9.557288977945587e-06, 9.557288977945587e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:10:21,703] [INFO] [timer.py:199:stop] epoch=0/micro_step=620/global_step=620, RunningAvgSamplesPerSec=106.9398379077226, CurrSamplesPerSec=119.05743097000362, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:10:21,797] [INFO] [logging.py:96:log_dist] [Rank 0] step=620, skipped=10, lr=[4.9519632010080765e-06, 4.9519632010080765e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 619|ppo_ep: 1|act_loss: 0.015380859375|cri_loss: 0.0081634521484375|unsuper_loss: 0.0
-average reward score: 6.078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.44%) |Training time=0.43s (20.00%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.38
-epoch: 0|step: 620|ppo_ep: 1|act_loss: -0.004367828369140625|cri_loss: -0.001682281494140625|unsuper_loss: 0.0
-average reward score: 5.515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.97%) |Training time=0.42s (19.48%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.38
-epoch: 0|step: 621|ppo_ep: 1|act_loss: -0.037322998046875|cri_loss: -0.0178985595703125|unsuper_loss: 0.0
-average reward score: 6.125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.80%) |Training time=0.45s (20.65%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.38
-epoch: 0|step: 622|ppo_ep: 1|act_loss: -0.0214996337890625|cri_loss: -0.0097198486328125|unsuper_loss: 0.0
-average reward score: 6.15625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.79%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.38
-epoch: 0|step: 623|ppo_ep: 1|act_loss: -0.051300048828125|cri_loss: -0.024322509765625|unsuper_loss: 0.0
-average reward score: 6.875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.46%) |Training time=0.45s (20.79%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.38
-epoch: 0|step: 624|ppo_ep: 1|act_loss: -0.03497314453125|cri_loss: -0.016021728515625|unsuper_loss: 0.0
-average reward score: 5.1328125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.20%) |Training time=0.44s (20.26%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.38
-epoch: 0|step: 625|ppo_ep: 1|act_loss: 0.016265869140625|cri_loss: 0.00904083251953125|unsuper_loss: 0.0
-average reward score: 6.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.57%) |Training time=0.45s (20.88%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.38
-epoch: 0|step: 626|ppo_ep: 1|act_loss: 0.03240966796875|cri_loss: 0.0170135498046875|unsuper_loss: 0.0
-average reward score: 5.78125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.40%) |Training time=0.46s (21.08%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.38
-epoch: 0|step: 627|ppo_ep: 1|act_loss: 0.0118408203125|cri_loss: 0.006252288818359375|unsuper_loss: 0.0
-average reward score: 5.59375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.45%) |Training time=0.43s (19.66%) |Others=0.11 (4.89%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.38
-epoch: 0|step: 628|ppo_ep: 1|act_loss: 0.0095062255859375|cri_loss: 0.005832672119140625|unsuper_loss: 0.0
-average reward score: 5.171875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.35%) |Training time=0.42s (19.12%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.38
-[2023-04-14 09:10:43,620] [INFO] [logging.py:96:log_dist] [Rank 0] step=630, skipped=10, lr=[9.55362987680931e-06, 9.55362987680931e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:10:44,139] [INFO] [timer.py:199:stop] epoch=0/micro_step=630/global_step=630, RunningAvgSamplesPerSec=106.77900790800318, CurrSamplesPerSec=42.10884357156303, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:10:44,235] [INFO] [logging.py:96:log_dist] [Rank 0] step=630, skipped=10, lr=[4.95006729368358e-06, 4.95006729368358e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 629|ppo_ep: 1|act_loss: 0.048309326171875|cri_loss: 0.027130126953125|unsuper_loss: 0.0
-average reward score: 5.5546875
--------------------------------------------------------------------------------------
-|E2E latency=2.84s |Gather latency=0.00s (0.00%) |Generate time=1.82s (63.97%) |Training time=0.92s (32.46%) |Others=0.10 (3.57%)|CurSamplesPerSec=11.25 |AvgSamplesPerSec=14.38
-epoch: 0|step: 630|ppo_ep: 1|act_loss: 0.08837890625|cri_loss: 0.0487060546875|unsuper_loss: 0.0
-average reward score: 4.65234375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.67%) |Training time=0.46s (20.86%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.38
-epoch: 0|step: 631|ppo_ep: 1|act_loss: 0.0255584716796875|cri_loss: 0.0142059326171875|unsuper_loss: 0.0
-average reward score: 4.984375
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.66%) |Training time=0.46s (20.00%) |Others=0.10 (4.35%)|CurSamplesPerSec=14.05 |AvgSamplesPerSec=14.38
-epoch: 0|step: 632|ppo_ep: 1|act_loss: 0.001983642578125|cri_loss: 0.00154876708984375|unsuper_loss: 0.0
-average reward score: 5.5625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.44%) |Training time=0.46s (21.01%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.38
-epoch: 0|step: 633|ppo_ep: 1|act_loss: -0.04437255859375|cri_loss: -0.02142333984375|unsuper_loss: 0.0
-average reward score: 5.98828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.39%) |Training time=0.46s (21.04%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.38
-epoch: 0|step: 634|ppo_ep: 1|act_loss: -0.04608154296875|cri_loss: -0.0216827392578125|unsuper_loss: 0.0
-average reward score: 5.75
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.35%) |Training time=0.46s (21.03%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.38
-epoch: 0|step: 635|ppo_ep: 1|act_loss: -0.0011425018310546875|cri_loss: 0.0001544952392578125|unsuper_loss: 0.0
-average reward score: 5.62890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.55%) |Training time=0.45s (20.89%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.38
-epoch: 0|step: 636|ppo_ep: 1|act_loss: 0.02423095703125|cri_loss: 0.01284027099609375|unsuper_loss: 0.0
-average reward score: 5.94921875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.34%) |Training time=0.46s (21.04%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.38
-epoch: 0|step: 637|ppo_ep: 1|act_loss: 0.058624267578125|cri_loss: 0.031890869140625|unsuper_loss: 0.0
-average reward score: 5.92578125
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.46%) |Training time=0.48s (21.83%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.38
-epoch: 0|step: 638|ppo_ep: 1|act_loss: 0.01776123046875|cri_loss: 0.00981903076171875|unsuper_loss: 0.0
-average reward score: 5.73046875
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.52%) |Training time=0.49s (22.02%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.38
-[2023-04-14 09:11:06,101] [INFO] [logging.py:96:log_dist] [Rank 0] step=640, skipped=10, lr=[9.549900685971059e-06, 9.549900685971059e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:11:06,119] [INFO] [timer.py:199:stop] epoch=0/micro_step=640/global_step=640, RunningAvgSamplesPerSec=106.75939140991701, CurrSamplesPerSec=95.74492697381844, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:11:06,212] [INFO] [logging.py:96:log_dist] [Rank 0] step=640, skipped=10, lr=[4.948135070451325e-06, 4.948135070451325e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 639|ppo_ep: 1|act_loss: 0.023193359375|cri_loss: 0.0130615234375|unsuper_loss: 0.0
-average reward score: 5.796875
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.14%) |Training time=0.50s (22.43%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.44 |AvgSamplesPerSec=14.38
-epoch: 0|step: 640|ppo_ep: 1|act_loss: -0.017852783203125|cri_loss: -0.00658416748046875|unsuper_loss: 0.0
-average reward score: 5.8828125
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.58%) |Training time=0.48s (21.95%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.38
-epoch: 0|step: 641|ppo_ep: 1|act_loss: -0.00551605224609375|cri_loss: -0.0018453598022460938|unsuper_loss: 0.0
-average reward score: 6.1875
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.28%) |Training time=0.48s (21.89%) |Others=0.11 (4.83%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.38
-epoch: 0|step: 642|ppo_ep: 1|act_loss: -0.0426025390625|cri_loss: -0.01947021484375|unsuper_loss: 0.0
-average reward score: 6.01171875
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.63%) |Training time=0.46s (20.89%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.54 |AvgSamplesPerSec=14.38
-epoch: 0|step: 643|ppo_ep: 1|act_loss: -0.005496978759765625|cri_loss: -0.0017728805541992188|unsuper_loss: 0.0
-average reward score: 5.9375
--------------------------------------------------------------------------------------
-|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.72s (73.10%) |Training time=0.53s (22.70%) |Others=0.10 (4.20%)|CurSamplesPerSec=13.62 |AvgSamplesPerSec=14.38
-epoch: 0|step: 644|ppo_ep: 1|act_loss: -0.01617431640625|cri_loss: -0.007472991943359375|unsuper_loss: 0.0
-average reward score: 6.1484375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.85%) |Training time=0.45s (20.63%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.38
-epoch: 0|step: 645|ppo_ep: 1|act_loss: -0.02459716796875|cri_loss: -0.00748443603515625|unsuper_loss: 0.0
-average reward score: 5.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.72%) |Training time=0.45s (20.76%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.38
-epoch: 0|step: 646|ppo_ep: 1|act_loss: 0.0555419921875|cri_loss: 0.028778076171875|unsuper_loss: 0.0
-average reward score: 5.328125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.55%) |Training time=0.45s (20.84%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.38
-epoch: 0|step: 647|ppo_ep: 1|act_loss: 0.06146240234375|cri_loss: 0.03546142578125|unsuper_loss: 0.0
-average reward score: 4.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.79%) |Training time=0.45s (20.67%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.38
-epoch: 0|step: 648|ppo_ep: 1|act_loss: 0.021026611328125|cri_loss: 0.01177215576171875|unsuper_loss: 0.0
-average reward score: 4.6796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.76%) |Training time=0.45s (20.71%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.38
-[2023-04-14 09:11:28,136] [INFO] [logging.py:96:log_dist] [Rank 0] step=650, skipped=10, lr=[9.546101460706439e-06, 9.546101460706439e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:11:28,154] [INFO] [timer.py:199:stop] epoch=0/micro_step=650/global_step=650, RunningAvgSamplesPerSec=106.73659438423795, CurrSamplesPerSec=108.43134463767952, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:11:28,247] [INFO] [logging.py:96:log_dist] [Rank 0] step=650, skipped=10, lr=[4.946166559951523e-06, 4.946166559951523e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 649|ppo_ep: 1|act_loss: 0.06475830078125|cri_loss: 0.035064697265625|unsuper_loss: 0.0
-average reward score: 4.3125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.49%) |Training time=0.46s (21.01%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.38
-epoch: 0|step: 650|ppo_ep: 1|act_loss: 0.03387451171875|cri_loss: 0.018646240234375|unsuper_loss: 0.0
-average reward score: 5.7890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.62%) |Training time=0.45s (20.83%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.38
-epoch: 0|step: 651|ppo_ep: 1|act_loss: -0.01080322265625|cri_loss: -0.00469207763671875|unsuper_loss: 0.0
-average reward score: 6.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.84%) |Training time=0.45s (20.62%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.38
-epoch: 0|step: 652|ppo_ep: 1|act_loss: 0.023223876953125|cri_loss: 0.0124664306640625|unsuper_loss: 0.0
-average reward score: 6.0
--------------------------------------------------------------------------------------
-|E2E latency=2.83s |Gather latency=0.00s (0.00%) |Generate time=1.63s (57.58%) |Training time=0.45s (15.96%) |Others=0.75 (26.45%)|CurSamplesPerSec=11.32 |AvgSamplesPerSec=14.38
-epoch: 0|step: 653|ppo_ep: 1|act_loss: -0.010345458984375|cri_loss: -0.00450897216796875|unsuper_loss: 0.0
-average reward score: 5.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.61%) |Training time=0.45s (20.86%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.38
-epoch: 0|step: 654|ppo_ep: 1|act_loss: 0.023651123046875|cri_loss: 0.0128631591796875|unsuper_loss: 0.0
-average reward score: 5.9453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.40%) |Training time=0.46s (21.06%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.38
-epoch: 0|step: 655|ppo_ep: 1|act_loss: -0.0174407958984375|cri_loss: -0.0077667236328125|unsuper_loss: 0.0
-average reward score: 6.359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.62%) |Training time=0.45s (20.84%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.38
-epoch: 0|step: 656|ppo_ep: 1|act_loss: -0.03515625|cri_loss: -0.015533447265625|unsuper_loss: 0.0
-average reward score: 5.6796875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.62%) |Training time=0.46s (20.85%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.38
-epoch: 0|step: 657|ppo_ep: 1|act_loss: 0.01385498046875|cri_loss: 0.007476806640625|unsuper_loss: 0.0
-average reward score: 5.5859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.67%) |Training time=0.43s (19.79%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.38
-epoch: 0|step: 658|ppo_ep: 1|act_loss: -0.00970458984375|cri_loss: -0.00411224365234375|unsuper_loss: 0.0
-average reward score: 6.0078125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.41%) |Training time=0.46s (21.06%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.38
-[2023-04-14 09:11:50,718] [INFO] [logging.py:96:log_dist] [Rank 0] step=660, skipped=10, lr=[9.542232257329135e-06, 9.542232257329135e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:11:50,736] [INFO] [timer.py:199:stop] epoch=0/micro_step=660/global_step=660, RunningAvgSamplesPerSec=106.79015890097004, CurrSamplesPerSec=105.78220256428848, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:11:50,845] [INFO] [logging.py:96:log_dist] [Rank 0] step=660, skipped=10, lr=[4.944161791362246e-06, 4.944161791362246e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 659|ppo_ep: 1|act_loss: 0.037841796875|cri_loss: 0.023101806640625|unsuper_loss: 0.0
-average reward score: 6.63671875
--------------------------------------------------------------------------------------
-|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.79s (75.59%) |Training time=0.47s (19.60%) |Others=0.11 (4.81%)|CurSamplesPerSec=13.48 |AvgSamplesPerSec=14.38
-epoch: 0|step: 660|ppo_ep: 1|act_loss: -0.0282745361328125|cri_loss: -0.013214111328125|unsuper_loss: 0.0
-average reward score: 6.0625
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.78%) |Training time=0.45s (19.86%) |Others=0.10 (4.35%)|CurSamplesPerSec=14.09 |AvgSamplesPerSec=14.38
-epoch: 0|step: 661|ppo_ep: 1|act_loss: -0.025054931640625|cri_loss: -0.012237548828125|unsuper_loss: 0.0
-average reward score: 5.82421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.50%) |Training time=0.46s (20.97%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.38
-epoch: 0|step: 662|ppo_ep: 1|act_loss: -0.02947998046875|cri_loss: -0.012115478515625|unsuper_loss: 0.0
-average reward score: 6.34375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.23%) |Training time=0.46s (21.24%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.38
-epoch: 0|step: 663|ppo_ep: 1|act_loss: 0.00266265869140625|cri_loss: 0.00197601318359375|unsuper_loss: 0.0
-average reward score: 6.06640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.53%) |Training time=0.45s (20.94%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.38
-epoch: 0|step: 664|ppo_ep: 1|act_loss: 0.026275634765625|cri_loss: 0.01369476318359375|unsuper_loss: 0.0
-average reward score: 5.7421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.68%) |Training time=0.45s (20.79%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.38
-epoch: 0|step: 665|ppo_ep: 1|act_loss: -0.0006084442138671875|cri_loss: 0.00041294097900390625|unsuper_loss: 0.0
-average reward score: 6.203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.46%) |Training time=0.46s (20.99%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.38
-epoch: 0|step: 666|ppo_ep: 1|act_loss: -0.00737762451171875|cri_loss: -0.00324249267578125|unsuper_loss: 0.0
-average reward score: 6.11328125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.27%) |Training time=0.46s (21.21%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.38
-epoch: 0|step: 667|ppo_ep: 1|act_loss: 0.00258636474609375|cri_loss: 0.0026836395263671875|unsuper_loss: 0.0
-average reward score: 6.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.51%) |Training time=0.46s (20.95%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.38
-epoch: 0|step: 668|ppo_ep: 1|act_loss: 0.0150604248046875|cri_loss: 0.0088348388671875|unsuper_loss: 0.0
-average reward score: 6.2734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.63%) |Training time=0.45s (20.83%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.38
-[2023-04-14 09:12:12,578] [INFO] [logging.py:96:log_dist] [Rank 0] step=670, skipped=10, lr=[9.538293133190075e-06, 9.538293133190075e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:12:12,596] [INFO] [timer.py:199:stop] epoch=0/micro_step=670/global_step=670, RunningAvgSamplesPerSec=106.82658739397799, CurrSamplesPerSec=110.64967844108436, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:12:12,689] [INFO] [logging.py:96:log_dist] [Rank 0] step=670, skipped=10, lr=[4.942120794399002e-06, 4.942120794399002e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 669|ppo_ep: 1|act_loss: 0.037322998046875|cri_loss: 0.022125244140625|unsuper_loss: 0.0
-average reward score: 5.73828125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.71%) |Training time=0.45s (20.78%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.38
-epoch: 0|step: 670|ppo_ep: 1|act_loss: -0.06207275390625|cri_loss: -0.0292816162109375|unsuper_loss: 0.0
-average reward score: 6.35546875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.80%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.38
-epoch: 0|step: 671|ppo_ep: 1|act_loss: 0.051605224609375|cri_loss: 0.0302734375|unsuper_loss: 0.0
-average reward score: 6.1015625
--------------------------------------------------------------------------------------
-|E2E latency=2.61s |Gather latency=0.00s (0.00%) |Generate time=1.64s (63.04%) |Training time=0.45s (17.15%) |Others=0.52 (19.81%)|CurSamplesPerSec=12.27 |AvgSamplesPerSec=14.38
-epoch: 0|step: 672|ppo_ep: 1|act_loss: -0.0310516357421875|cri_loss: -0.0143280029296875|unsuper_loss: 0.0
-average reward score: 6.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.67%) |Training time=0.43s (19.75%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.38
-epoch: 0|step: 673|ppo_ep: 1|act_loss: 0.03326416015625|cri_loss: 0.017730712890625|unsuper_loss: 0.0
-average reward score: 5.83203125
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.62s (71.00%) |Training time=0.56s (24.67%) |Others=0.10 (4.32%)|CurSamplesPerSec=14.01 |AvgSamplesPerSec=14.38
-epoch: 0|step: 674|ppo_ep: 1|act_loss: 0.06341552734375|cri_loss: 0.033233642578125|unsuper_loss: 0.0
-average reward score: 5.9140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.38%) |Training time=0.44s (20.08%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.38
-epoch: 0|step: 675|ppo_ep: 1|act_loss: 0.003040313720703125|cri_loss: 0.0020275115966796875|unsuper_loss: 0.0
-average reward score: 5.77734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.85%) |Training time=0.45s (20.59%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.38
-epoch: 0|step: 676|ppo_ep: 1|act_loss: -0.002918243408203125|cri_loss: -0.0004673004150390625|unsuper_loss: 0.0
-average reward score: 6.171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.62%) |Training time=0.45s (20.83%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.38
-epoch: 0|step: 677|ppo_ep: 1|act_loss: -0.061248779296875|cri_loss: -0.025970458984375|unsuper_loss: 0.0
-average reward score: 5.7109375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.63%) |Training time=0.45s (20.81%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.38
-epoch: 0|step: 678|ppo_ep: 1|act_loss: -0.04840087890625|cri_loss: -0.019805908203125|unsuper_loss: 0.0
-average reward score: 6.37109375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.63%) |Training time=0.45s (20.82%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.38
-[2023-04-14 09:12:34,852] [INFO] [logging.py:96:log_dist] [Rank 0] step=680, skipped=10, lr=[9.534284146676578e-06, 9.534284146676578e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:12:34,870] [INFO] [timer.py:199:stop] epoch=0/micro_step=680/global_step=680, RunningAvgSamplesPerSec=106.85888050857336, CurrSamplesPerSec=109.72457047932349, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:12:34,963] [INFO] [logging.py:96:log_dist] [Rank 0] step=680, skipped=10, lr=[4.9400435993142895e-06, 4.9400435993142895e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 679|ppo_ep: 1|act_loss: -0.08935546875|cri_loss: -0.039794921875|unsuper_loss: 0.0
-average reward score: 6.80078125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.63%) |Training time=0.45s (20.86%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.38
-epoch: 0|step: 680|ppo_ep: 1|act_loss: -0.05218505859375|cri_loss: -0.02313232421875|unsuper_loss: 0.0
-average reward score: 6.2578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.79%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.38
-epoch: 0|step: 681|ppo_ep: 1|act_loss: -0.0291748046875|cri_loss: -0.01338958740234375|unsuper_loss: 0.0
-average reward score: 6.03125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.78%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.38
-epoch: 0|step: 682|ppo_ep: 1|act_loss: -0.0007305145263671875|cri_loss: 0.0006284713745117188|unsuper_loss: 0.0
-average reward score: 5.96875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.50%) |Training time=0.46s (20.96%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.38
-epoch: 0|step: 683|ppo_ep: 1|act_loss: 0.072021484375|cri_loss: 0.04058837890625|unsuper_loss: 0.0
-average reward score: 6.0703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.44%) |Training time=0.46s (21.04%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.38
-epoch: 0|step: 684|ppo_ep: 1|act_loss: 0.017791748046875|cri_loss: 0.0110321044921875|unsuper_loss: 0.0
-average reward score: 5.87890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.54%) |Training time=0.45s (20.91%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.38
-epoch: 0|step: 685|ppo_ep: 1|act_loss: 0.0330810546875|cri_loss: 0.018218994140625|unsuper_loss: 0.0
-average reward score: 5.90625
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.66%) |Training time=0.45s (20.36%) |Others=0.13 (5.98%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.38
-epoch: 0|step: 686|ppo_ep: 1|act_loss: -0.005847930908203125|cri_loss: 8.392333984375e-05|unsuper_loss: 0.0
-average reward score: 7.765625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.57%) |Training time=0.46s (20.89%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.38
-epoch: 0|step: 687|ppo_ep: 1|act_loss: -0.0004673004150390625|cri_loss: 0.0002837181091308594|unsuper_loss: 0.0
-average reward score: 7.359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.28%) |Training time=0.44s (20.15%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.39
-epoch: 0|step: 688|ppo_ep: 1|act_loss: -0.022308349609375|cri_loss: -0.01081085205078125|unsuper_loss: 0.0
-average reward score: 7.1875
--------------------------------------------------------------------------------------
-|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.63s (70.81%) |Training time=0.57s (24.90%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.91 |AvgSamplesPerSec=14.38
-[2023-04-14 09:12:56,792] [INFO] [logging.py:96:log_dist] [Rank 0] step=690, skipped=10, lr=[9.5302053572115e-06, 9.5302053572115e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:12:56,809] [INFO] [timer.py:199:stop] epoch=0/micro_step=690/global_step=690, RunningAvgSamplesPerSec=106.85876245451874, CurrSamplesPerSec=107.36396135087939, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:12:56,902] [INFO] [logging.py:96:log_dist] [Rank 0] step=690, skipped=10, lr=[4.937930236897151e-06, 4.937930236897151e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 689|ppo_ep: 1|act_loss: -0.044921875|cri_loss: -0.0218658447265625|unsuper_loss: 0.0
-average reward score: 6.12109375
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.65s (74.47%) |Training time=0.47s (21.09%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.41 |AvgSamplesPerSec=14.38
-epoch: 0|step: 690|ppo_ep: 1|act_loss: 0.017822265625|cri_loss: 0.00949859619140625|unsuper_loss: 0.0
-average reward score: 6.8515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.48%) |Training time=0.46s (20.98%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39
-epoch: 0|step: 691|ppo_ep: 1|act_loss: -0.010772705078125|cri_loss: -0.0048675537109375|unsuper_loss: 0.0
-average reward score: 6.66796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.49%) |Training time=0.46s (20.97%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.39
-epoch: 0|step: 692|ppo_ep: 1|act_loss: -0.02557373046875|cri_loss: -0.011932373046875|unsuper_loss: 0.0
-average reward score: 6.3984375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.49%) |Training time=0.46s (20.99%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.39
-epoch: 0|step: 693|ppo_ep: 1|act_loss: -0.012969970703125|cri_loss: -0.0058441162109375|unsuper_loss: 0.0
-average reward score: 6.5859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.10%) |Training time=0.44s (20.36%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.39
-epoch: 0|step: 694|ppo_ep: 1|act_loss: -0.0142974853515625|cri_loss: -0.006664276123046875|unsuper_loss: 0.0
-average reward score: 6.52734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.05%) |Training time=0.44s (20.41%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39
-epoch: 0|step: 695|ppo_ep: 1|act_loss: -0.031768798828125|cri_loss: -0.01519012451171875|unsuper_loss: 0.0
-average reward score: 6.88671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.43%) |Training time=0.46s (21.02%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39
-epoch: 0|step: 696|ppo_ep: 1|act_loss: 0.026397705078125|cri_loss: 0.01434326171875|unsuper_loss: 0.0
-average reward score: 7.0703125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.52%) |Training time=0.46s (20.94%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.39
-epoch: 0|step: 697|ppo_ep: 1|act_loss: 0.00650787353515625|cri_loss: 0.003849029541015625|unsuper_loss: 0.0
-average reward score: 5.8046875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.76%) |Training time=0.45s (20.70%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.39
-epoch: 0|step: 698|ppo_ep: 1|act_loss: 0.00577545166015625|cri_loss: 0.0033206939697265625|unsuper_loss: 0.0
-average reward score: 6.82421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.57%) |Training time=0.45s (20.89%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.39
-[2023-04-14 09:13:18,531] [INFO] [logging.py:96:log_dist] [Rank 0] step=700, skipped=10, lr=[9.526056825252338e-06, 9.526056825252338e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:13:18,548] [INFO] [timer.py:199:stop] epoch=0/micro_step=700/global_step=700, RunningAvgSamplesPerSec=106.90781997916815, CurrSamplesPerSec=108.76533453483435, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:13:18,641] [INFO] [logging.py:96:log_dist] [Rank 0] step=700, skipped=10, lr=[4.935780738472714e-06, 4.935780738472714e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 699|ppo_ep: 1|act_loss: 0.04449462890625|cri_loss: 0.0231781005859375|unsuper_loss: 0.0
-average reward score: 6.2578125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.49%) |Training time=0.46s (21.00%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.39
-epoch: 0|step: 700|ppo_ep: 1|act_loss: 0.03704833984375|cri_loss: 0.01910400390625|unsuper_loss: 0.0
-average reward score: 6.64453125
--------------------------------------------------------------------------------------
-|E2E latency=2.41s |Gather latency=0.00s (0.00%) |Generate time=1.62s (67.26%) |Training time=0.46s (19.00%) |Others=0.33 (13.75%)|CurSamplesPerSec=13.30 |AvgSamplesPerSec=14.39
-epoch: 0|step: 701|ppo_ep: 1|act_loss: -0.01427459716796875|cri_loss: -0.00638580322265625|unsuper_loss: 0.0
-average reward score: 6.8671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.99%) |Training time=0.44s (20.43%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.39
-epoch: 0|step: 702|ppo_ep: 1|act_loss: 0.043365478515625|cri_loss: 0.02227783203125|unsuper_loss: 0.0
-average reward score: 6.65625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.76%) |Training time=0.43s (19.66%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.39
-epoch: 0|step: 703|ppo_ep: 1|act_loss: -0.0113067626953125|cri_loss: -0.0049591064453125|unsuper_loss: 0.0
-average reward score: 7.98828125
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.64s (72.31%) |Training time=0.53s (23.32%) |Others=0.10 (4.37%)|CurSamplesPerSec=14.12 |AvgSamplesPerSec=14.39
-epoch: 0|step: 704|ppo_ep: 1|act_loss: -0.017974853515625|cri_loss: -0.00820159912109375|unsuper_loss: 0.0
-average reward score: 5.9609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.18%) |Training time=0.44s (20.26%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.39
-epoch: 0|step: 705|ppo_ep: 1|act_loss: 0.0028667449951171875|cri_loss: 0.0019893646240234375|unsuper_loss: 0.0
-average reward score: 6.1796875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.01%) |Training time=0.44s (20.35%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.39
-epoch: 0|step: 706|ppo_ep: 1|act_loss: 0.0090789794921875|cri_loss: 0.0047760009765625|unsuper_loss: 0.0
-average reward score: 6.68359375
--------------------------------------------------------------------------------------
-|E2E latency=2.45s |Gather latency=0.00s (0.00%) |Generate time=1.62s (66.09%) |Training time=0.45s (18.32%) |Others=0.38 (15.59%)|CurSamplesPerSec=13.08 |AvgSamplesPerSec=14.39
-epoch: 0|step: 707|ppo_ep: 1|act_loss: -0.019134521484375|cri_loss: -0.00870513916015625|unsuper_loss: 0.0
-average reward score: 5.890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.77%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.39
-epoch: 0|step: 708|ppo_ep: 1|act_loss: -0.0189361572265625|cri_loss: -0.00836181640625|unsuper_loss: 0.0
-average reward score: 6.6953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.09%) |Training time=0.44s (20.36%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.39
-[2023-04-14 09:13:40,822] [INFO] [logging.py:96:log_dist] [Rank 0] step=710, skipped=10, lr=[9.521838612290344e-06, 9.521838612290344e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:13:40,840] [INFO] [timer.py:199:stop] epoch=0/micro_step=710/global_step=710, RunningAvgSamplesPerSec=106.9759180678844, CurrSamplesPerSec=109.9996541451361, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:13:40,933] [INFO] [logging.py:96:log_dist] [Rank 0] step=710, skipped=10, lr=[4.933595135901733e-06, 4.933595135901733e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 709|ppo_ep: 1|act_loss: -0.016876220703125|cri_loss: -0.008026123046875|unsuper_loss: 0.0
-average reward score: 6.3203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.54%) |Training time=0.45s (20.92%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.39
-epoch: 0|step: 710|ppo_ep: 1|act_loss: -0.021514892578125|cri_loss: -0.01042938232421875|unsuper_loss: 0.0
-average reward score: 6.97265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.74%) |Training time=0.45s (20.70%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.39
-epoch: 0|step: 711|ppo_ep: 1|act_loss: -0.0162353515625|cri_loss: -0.00786590576171875|unsuper_loss: 0.0
-average reward score: 6.328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.71%) |Training time=0.45s (20.74%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.39
-epoch: 0|step: 712|ppo_ep: 1|act_loss: -0.00830078125|cri_loss: -0.0038661956787109375|unsuper_loss: 0.0
-average reward score: 6.0
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.55%) |Training time=0.45s (20.91%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.39
-epoch: 0|step: 713|ppo_ep: 1|act_loss: 0.0106964111328125|cri_loss: 0.005687713623046875|unsuper_loss: 0.0
-average reward score: 7.04296875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.78%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.39
-epoch: 0|step: 714|ppo_ep: 1|act_loss: -0.017486572265625|cri_loss: -0.00839996337890625|unsuper_loss: 0.0
-average reward score: 6.53515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.78%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.39
-epoch: 0|step: 715|ppo_ep: 1|act_loss: -0.013763427734375|cri_loss: -0.006649017333984375|unsuper_loss: 0.0
-average reward score: 5.91796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.78%) |Training time=0.45s (20.66%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.39
-epoch: 0|step: 716|ppo_ep: 1|act_loss: -0.0003304481506347656|cri_loss: 0.0001533031463623047|unsuper_loss: 0.0
-average reward score: 5.84375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.76%) |Training time=0.45s (20.69%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39
-epoch: 0|step: 717|ppo_ep: 1|act_loss: 0.0340576171875|cri_loss: 0.02008056640625|unsuper_loss: 0.0
-average reward score: 5.8125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.94%) |Training time=0.42s (19.52%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39
-epoch: 0|step: 718|ppo_ep: 1|act_loss: -0.03125|cri_loss: -0.0149383544921875|unsuper_loss: 0.0
-average reward score: 6.3828125
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.47%) |Training time=0.44s (20.04%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.39
-[2023-04-14 09:14:02,796] [INFO] [logging.py:96:log_dist] [Rank 0] step=720, skipped=10, lr=[9.517550780849608e-06, 9.517550780849608e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:14:02,814] [INFO] [timer.py:199:stop] epoch=0/micro_step=720/global_step=720, RunningAvgSamplesPerSec=107.05598671959284, CurrSamplesPerSec=114.59396915763213, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:14:02,907] [INFO] [logging.py:96:log_dist] [Rank 0] step=720, skipped=10, lr=[4.9313734615801076e-06, 4.9313734615801076e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 719|ppo_ep: 1|act_loss: -0.0001888275146484375|cri_loss: 0.0005574226379394531|unsuper_loss: 0.0
-average reward score: 6.26953125
--------------------------------------------------------------------------------------
-|E2E latency=2.43s |Gather latency=0.00s (0.00%) |Generate time=1.89s (77.74%) |Training time=0.44s (18.20%) |Others=0.10 (4.06%)|CurSamplesPerSec=13.19 |AvgSamplesPerSec=14.39
-epoch: 0|step: 720|ppo_ep: 1|act_loss: 0.01340484619140625|cri_loss: 0.00720977783203125|unsuper_loss: 0.0
-average reward score: 6.65625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.14%) |Training time=0.44s (20.29%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.39
-epoch: 0|step: 721|ppo_ep: 1|act_loss: -0.0146636962890625|cri_loss: -0.006771087646484375|unsuper_loss: 0.0
-average reward score: 5.671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.15%) |Training time=0.44s (20.27%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.39
-epoch: 0|step: 722|ppo_ep: 1|act_loss: 0.025054931640625|cri_loss: 0.013336181640625|unsuper_loss: 0.0
-average reward score: 5.57421875
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.62s (69.13%) |Training time=0.44s (18.96%) |Others=0.28 (11.91%)|CurSamplesPerSec=13.68 |AvgSamplesPerSec=14.39
-epoch: 0|step: 723|ppo_ep: 1|act_loss: 0.007061004638671875|cri_loss: 0.00450897216796875|unsuper_loss: 0.0
-average reward score: 5.5703125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.96%) |Training time=0.44s (20.45%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.39
-epoch: 0|step: 724|ppo_ep: 1|act_loss: 0.06036376953125|cri_loss: 0.037261962890625|unsuper_loss: 0.0
-average reward score: 5.515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.16%) |Training time=0.44s (20.27%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.39
-epoch: 0|step: 725|ppo_ep: 1|act_loss: 0.0367431640625|cri_loss: 0.0194091796875|unsuper_loss: 0.0
-average reward score: 5.88671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.08%) |Training time=0.44s (20.34%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.39
-epoch: 0|step: 726|ppo_ep: 1|act_loss: 0.0280914306640625|cri_loss: 0.0167236328125|unsuper_loss: 0.0
-average reward score: 5.984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.25%) |Training time=0.44s (20.20%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.39
-epoch: 0|step: 727|ppo_ep: 1|act_loss: 0.0149993896484375|cri_loss: 0.009033203125|unsuper_loss: 0.0
-average reward score: 5.49609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.96%) |Training time=0.42s (19.47%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.39
-epoch: 0|step: 728|ppo_ep: 1|act_loss: 0.01483154296875|cri_loss: 0.0087432861328125|unsuper_loss: 0.0
-average reward score: 5.83203125
--------------------------------------------------------------------------------------
-|E2E latency=2.57s |Gather latency=0.00s (0.00%) |Generate time=1.62s (63.00%) |Training time=0.44s (16.91%) |Others=0.52 (20.09%)|CurSamplesPerSec=12.43 |AvgSamplesPerSec=14.39
-[2023-04-14 09:14:24,985] [INFO] [logging.py:96:log_dist] [Rank 0] step=730, skipped=10, lr=[9.51319339448614e-06, 9.51319339448614e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:14:25,003] [INFO] [timer.py:199:stop] epoch=0/micro_step=730/global_step=730, RunningAvgSamplesPerSec=107.17587828789893, CurrSamplesPerSec=115.30782176899385, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:14:25,096] [INFO] [logging.py:96:log_dist] [Rank 0] step=730, skipped=10, lr=[4.929115748438415e-06, 4.929115748438415e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 729|ppo_ep: 1|act_loss: 0.02777099609375|cri_loss: 0.01476287841796875|unsuper_loss: 0.0
-average reward score: 5.5546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.04%) |Training time=0.44s (20.40%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.39
-epoch: 0|step: 730|ppo_ep: 1|act_loss: -0.037384033203125|cri_loss: -0.0158538818359375|unsuper_loss: 0.0
-average reward score: 5.4453125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.07%) |Training time=0.44s (20.34%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.39
-epoch: 0|step: 731|ppo_ep: 1|act_loss: 0.03265380859375|cri_loss: 0.019927978515625|unsuper_loss: 0.0
-average reward score: 6.0625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.51%) |Training time=0.43s (19.91%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.39
-epoch: 0|step: 732|ppo_ep: 1|act_loss: 0.0188751220703125|cri_loss: 0.0112762451171875|unsuper_loss: 0.0
-average reward score: 5.9921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.61%) |Training time=0.43s (19.85%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.39
-epoch: 0|step: 733|ppo_ep: 1|act_loss: 0.0577392578125|cri_loss: 0.034088134765625|unsuper_loss: 0.0
-average reward score: 6.3125
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.67%) |Training time=0.45s (19.09%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.39
-epoch: 0|step: 734|ppo_ep: 1|act_loss: 0.01357269287109375|cri_loss: 0.009857177734375|unsuper_loss: 0.0
-average reward score: 5.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.71%) |Training time=0.45s (20.73%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.39
-epoch: 0|step: 735|ppo_ep: 1|act_loss: 0.0099334716796875|cri_loss: 0.007404327392578125|unsuper_loss: 0.0
-average reward score: 6.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.62%) |Training time=0.45s (20.85%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.39
-epoch: 0|step: 736|ppo_ep: 1|act_loss: 0.0087432861328125|cri_loss: 0.00799560546875|unsuper_loss: 0.0
-average reward score: 5.79296875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.92%) |Training time=0.44s (20.51%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.39
-epoch: 0|step: 737|ppo_ep: 1|act_loss: -0.020660400390625|cri_loss: -0.00601959228515625|unsuper_loss: 0.0
-average reward score: 5.6484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.87%) |Training time=0.45s (20.57%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.39
-epoch: 0|step: 738|ppo_ep: 1|act_loss: 0.0296478271484375|cri_loss: 0.017120361328125|unsuper_loss: 0.0
-average reward score: 5.80078125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.86%) |Training time=0.45s (20.58%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.39
-[2023-04-14 09:14:46,793] [INFO] [logging.py:96:log_dist] [Rank 0] step=740, skipped=10, lr=[9.508766517786914e-06, 9.508766517786914e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:14:46,811] [INFO] [timer.py:199:stop] epoch=0/micro_step=740/global_step=740, RunningAvgSamplesPerSec=107.26615834374832, CurrSamplesPerSec=110.89899609178117, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:14:46,904] [INFO] [logging.py:96:log_dist] [Rank 0] step=740, skipped=10, lr=[4.926822029941406e-06, 4.926822029941406e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 739|ppo_ep: 1|act_loss: 0.06011962890625|cri_loss: 0.03375244140625|unsuper_loss: 0.0
-average reward score: 6.04296875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.64%) |Training time=0.45s (20.83%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.40
-epoch: 0|step: 740|ppo_ep: 1|act_loss: 0.05902099609375|cri_loss: 0.0396728515625|unsuper_loss: 0.0
-average reward score: 5.3515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.65%) |Training time=0.45s (20.78%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.40
-epoch: 0|step: 741|ppo_ep: 1|act_loss: 0.0015230178833007812|cri_loss: 0.00215911865234375|unsuper_loss: 0.0
-average reward score: 5.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.81%) |Training time=0.45s (20.63%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.40
-epoch: 0|step: 742|ppo_ep: 1|act_loss: -0.025299072265625|cri_loss: -0.00882720947265625|unsuper_loss: 0.0
-average reward score: 5.58984375
--------------------------------------------------------------------------------------
-|E2E latency=2.47s |Gather latency=0.00s (0.00%) |Generate time=1.62s (65.72%) |Training time=0.45s (18.24%) |Others=0.40 (16.04%)|CurSamplesPerSec=12.98 |AvgSamplesPerSec=14.39
-epoch: 0|step: 743|ppo_ep: 1|act_loss: 0.13818359375|cri_loss: 0.08477783203125|unsuper_loss: 0.0
-average reward score: 5.765625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.93%) |Training time=0.44s (20.50%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.39
-epoch: 0|step: 744|ppo_ep: 1|act_loss: 0.013946533203125|cri_loss: 0.009307861328125|unsuper_loss: 0.0
-average reward score: 5.73828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.65%) |Training time=0.45s (20.78%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.40
-epoch: 0|step: 745|ppo_ep: 1|act_loss: -0.13232421875|cri_loss: -0.056793212890625|unsuper_loss: 0.0
-average reward score: 5.62890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.69%) |Training time=0.45s (20.77%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.40
-epoch: 0|step: 746|ppo_ep: 1|act_loss: 0.020477294921875|cri_loss: 0.013763427734375|unsuper_loss: 0.0
-average reward score: 6.296875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.51%) |Training time=0.43s (19.94%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.40
-epoch: 0|step: 747|ppo_ep: 1|act_loss: 0.060943603515625|cri_loss: 0.0345458984375|unsuper_loss: 0.0
-average reward score: 5.7578125
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.68s (76.20%) |Training time=0.42s (19.31%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.54 |AvgSamplesPerSec=14.40
-epoch: 0|step: 748|ppo_ep: 1|act_loss: 0.0953369140625|cri_loss: 0.051788330078125|unsuper_loss: 0.0
-average reward score: 5.58984375
--------------------------------------------------------------------------------------
-|E2E latency=3.11s |Gather latency=0.00s (0.00%) |Generate time=1.89s (60.67%) |Training time=0.44s (14.24%) |Others=0.78 (25.09%)|CurSamplesPerSec=10.29 |AvgSamplesPerSec=14.39
-[2023-04-14 09:15:09,708] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 4096, but hysteresis is 2. Reducing hysteresis to 1
-[2023-04-14 09:15:09,708] [INFO] [logging.py:96:log_dist] [Rank 0] step=750, skipped=11, lr=[9.504722968731713e-06, 9.504722968731713e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:15:09,709] [INFO] [timer.py:199:stop] epoch=0/micro_step=750/global_step=750, RunningAvgSamplesPerSec=107.37028457158374, CurrSamplesPerSec=130.24941895276606, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:15:09,801] [INFO] [logging.py:96:log_dist] [Rank 0] step=750, skipped=10, lr=[4.9244923400875245e-06, 4.9244923400875245e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 749|ppo_ep: 1|act_loss: -0.016571044921875|cri_loss: -0.0033111572265625|unsuper_loss: 0.0
-average reward score: 5.74609375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.35%) |Training time=0.41s (19.04%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.39
-epoch: 0|step: 750|ppo_ep: 1|act_loss: 0.0894775390625|cri_loss: 0.0479736328125|unsuper_loss: 0.0
-average reward score: 5.4609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.43%) |Training time=0.43s (20.02%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.39
-epoch: 0|step: 751|ppo_ep: 1|act_loss: -0.052947998046875|cri_loss: -0.0222015380859375|unsuper_loss: 0.0
-average reward score: 5.953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.69%) |Training time=0.43s (19.68%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.39
-epoch: 0|step: 752|ppo_ep: 1|act_loss: -0.16259765625|cri_loss: -0.07012939453125|unsuper_loss: 0.0
-average reward score: 5.8515625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.06%) |Training time=0.44s (20.42%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.39
-epoch: 0|step: 753|ppo_ep: 1|act_loss: -0.019073486328125|cri_loss: -0.007190704345703125|unsuper_loss: 0.0
-average reward score: 5.9609375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.31%) |Training time=0.44s (20.21%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.39
-epoch: 0|step: 754|ppo_ep: 1|act_loss: 0.01169586181640625|cri_loss: 0.00966644287109375|unsuper_loss: 0.0
-average reward score: 5.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.64s |Gather latency=0.00s (0.00%) |Generate time=1.63s (61.60%) |Training time=0.45s (16.89%) |Others=0.57 (21.51%)|CurSamplesPerSec=12.13 |AvgSamplesPerSec=14.39
-epoch: 0|step: 755|ppo_ep: 1|act_loss: 0.0743408203125|cri_loss: 0.040374755859375|unsuper_loss: 0.0
-average reward score: 5.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.86%) |Training time=0.45s (20.61%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.39
-epoch: 0|step: 756|ppo_ep: 1|act_loss: 0.00849151611328125|cri_loss: 0.005237579345703125|unsuper_loss: 0.0
-average reward score: 5.76171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.84%) |Training time=0.45s (20.61%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.39
-epoch: 0|step: 757|ppo_ep: 1|act_loss: -0.019805908203125|cri_loss: -0.006443023681640625|unsuper_loss: 0.0
-average reward score: 5.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.91%) |Training time=0.45s (20.53%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.39
-epoch: 0|step: 758|ppo_ep: 1|act_loss: -0.0068206787109375|cri_loss: -0.0015316009521484375|unsuper_loss: 0.0
-average reward score: 5.3828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.80%) |Training time=0.45s (20.66%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.39
-[2023-04-14 09:15:31,886] [INFO] [logging.py:96:log_dist] [Rank 0] step=760, skipped=11, lr=[9.500164242019886e-06, 9.500164242019886e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:15:31,904] [INFO] [timer.py:199:stop] epoch=0/micro_step=760/global_step=760, RunningAvgSamplesPerSec=107.4533984061924, CurrSamplesPerSec=112.81274968711625, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:15:31,997] [INFO] [logging.py:96:log_dist] [Rank 0] step=760, skipped=10, lr=[4.922126713408392e-06, 4.922126713408392e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 759|ppo_ep: 1|act_loss: -0.0772705078125|cri_loss: -0.036865234375|unsuper_loss: 0.0
-average reward score: 5.9296875
--------------------------------------------------------------------------------------
-|E2E latency=2.73s |Gather latency=0.00s (0.00%) |Generate time=1.63s (59.60%) |Training time=0.45s (16.36%) |Others=0.66 (24.04%)|CurSamplesPerSec=11.73 |AvgSamplesPerSec=14.39
-epoch: 0|step: 760|ppo_ep: 1|act_loss: -0.0902099609375|cri_loss: -0.041015625|unsuper_loss: 0.0
-average reward score: 5.8359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.71%) |Training time=0.45s (20.74%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.39
-epoch: 0|step: 761|ppo_ep: 1|act_loss: -0.000972747802734375|cri_loss: 0.000545501708984375|unsuper_loss: 0.0
-average reward score: 5.15625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.30%) |Training time=0.44s (20.10%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.39
-epoch: 0|step: 762|ppo_ep: 1|act_loss: 0.005298614501953125|cri_loss: 0.0035610198974609375|unsuper_loss: 0.0
-average reward score: 5.82421875
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.82s (78.13%) |Training time=0.41s (17.61%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.39
-epoch: 0|step: 763|ppo_ep: 1|act_loss: 0.0191497802734375|cri_loss: 0.0120086669921875|unsuper_loss: 0.0
-average reward score: 5.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.77%) |Training time=0.45s (20.67%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.39
-epoch: 0|step: 764|ppo_ep: 1|act_loss: 0.07623291015625|cri_loss: 0.041717529296875|unsuper_loss: 0.0
-average reward score: 5.55859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.91%) |Training time=0.45s (20.53%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.39
-epoch: 0|step: 765|ppo_ep: 1|act_loss: 0.0992431640625|cri_loss: 0.052703857421875|unsuper_loss: 0.0
-average reward score: 5.57421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.45%) |Training time=0.46s (20.99%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.39
-epoch: 0|step: 766|ppo_ep: 1|act_loss: -0.0380859375|cri_loss: -0.0171661376953125|unsuper_loss: 0.0
-average reward score: 5.703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.95%) |Training time=0.44s (20.48%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.39
-epoch: 0|step: 767|ppo_ep: 1|act_loss: 0.01441192626953125|cri_loss: 0.010528564453125|unsuper_loss: 0.0
-average reward score: 5.453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.59%) |Training time=0.45s (20.85%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.39
-epoch: 0|step: 768|ppo_ep: 1|act_loss: -0.042572021484375|cri_loss: -0.017120361328125|unsuper_loss: 0.0
-average reward score: 7.19140625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.26%) |Training time=0.46s (21.21%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.39
-[2023-04-14 09:15:54,304] [INFO] [logging.py:96:log_dist] [Rank 0] step=770, skipped=11, lr=[9.49553621809577e-06, 9.49553621809577e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:15:55,082] [INFO] [timer.py:199:stop] epoch=0/micro_step=770/global_step=770, RunningAvgSamplesPerSec=107.16824767200629, CurrSamplesPerSec=30.633630233362283, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:15:55,175] [INFO] [logging.py:96:log_dist] [Rank 0] step=770, skipped=10, lr=[4.919725184968307e-06, 4.919725184968307e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 769|ppo_ep: 1|act_loss: -0.01451873779296875|cri_loss: -0.0052490234375|unsuper_loss: 0.0
-average reward score: 6.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.93s |Gather latency=0.00s (0.00%) |Generate time=1.62s (55.38%) |Training time=1.21s (41.26%) |Others=0.10 (3.36%)|CurSamplesPerSec=10.93 |AvgSamplesPerSec=14.38
-epoch: 0|step: 770|ppo_ep: 1|act_loss: 0.0462646484375|cri_loss: 0.0270233154296875|unsuper_loss: 0.0
-average reward score: 5.74609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.77%) |Training time=0.45s (20.67%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.38
-epoch: 0|step: 771|ppo_ep: 1|act_loss: 0.0252532958984375|cri_loss: 0.0135498046875|unsuper_loss: 0.0
-average reward score: 6.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.69%) |Training time=0.45s (20.76%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.38
-epoch: 0|step: 772|ppo_ep: 1|act_loss: -0.01418304443359375|cri_loss: -0.0051727294921875|unsuper_loss: 0.0
-average reward score: 5.8671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.65%) |Training time=0.45s (20.81%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.38
-epoch: 0|step: 773|ppo_ep: 1|act_loss: -0.01824951171875|cri_loss: -0.00795745849609375|unsuper_loss: 0.0
-average reward score: 6.390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.76%) |Training time=0.45s (20.69%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.38
-epoch: 0|step: 774|ppo_ep: 1|act_loss: 0.12939453125|cri_loss: 0.07391357421875|unsuper_loss: 0.0
-average reward score: 5.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.51%) |Training time=0.45s (20.94%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.38
-epoch: 0|step: 775|ppo_ep: 1|act_loss: -0.010955810546875|cri_loss: -0.00473785400390625|unsuper_loss: 0.0
-average reward score: 6.5234375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.23%) |Training time=0.44s (20.20%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.38
-epoch: 0|step: 776|ppo_ep: 1|act_loss: -0.0279388427734375|cri_loss: -0.01334381103515625|unsuper_loss: 0.0
-average reward score: 5.8671875
--------------------------------------------------------------------------------------
-|E2E latency=2.51s |Gather latency=0.00s (0.00%) |Generate time=1.85s (73.64%) |Training time=0.45s (17.73%) |Others=0.22 (8.63%)|CurSamplesPerSec=12.74 |AvgSamplesPerSec=14.38
-epoch: 0|step: 777|ppo_ep: 1|act_loss: 0.01385498046875|cri_loss: 0.0074615478515625|unsuper_loss: 0.0
-average reward score: 7.5390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.60%) |Training time=0.45s (20.84%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.38
-[2023-04-14 09:16:15,024] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 778|ppo_ep: 1|act_loss: -0.020172119140625|cri_loss: -0.00988006591796875|unsuper_loss: 0.0
-average reward score: 6.88671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.22%) |Training time=0.45s (20.70%) |Others=0.09 (4.08%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.38
-[2023-04-14 09:16:17,078] [INFO] [logging.py:96:log_dist] [Rank 0] step=780, skipped=11, lr=[9.490838965557847e-06, 9.490838965557847e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:16:17,096] [INFO] [timer.py:199:stop] epoch=0/micro_step=780/global_step=780, RunningAvgSamplesPerSec=107.25929385459395, CurrSamplesPerSec=115.33516940086052, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:16:17,181] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-[2023-04-14 09:16:17,182] [INFO] [logging.py:96:log_dist] [Rank 0] step=780, skipped=12, lr=[4.917778136851294e-06, 4.917778136851294e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 779|ppo_ep: 1|act_loss: -0.0114898681640625|cri_loss: -0.00531005859375|unsuper_loss: 0.0
-average reward score: 6.54296875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.73%) |Training time=0.46s (21.14%) |Others=0.09 (4.13%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.38
-epoch: 0|step: 780|ppo_ep: 1|act_loss: -0.04376220703125|cri_loss: -0.021148681640625|unsuper_loss: 0.0
-average reward score: 6.203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.35%) |Training time=0.46s (21.02%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.38
-epoch: 0|step: 781|ppo_ep: 1|act_loss: -0.00690460205078125|cri_loss: -0.003047943115234375|unsuper_loss: 0.0
-average reward score: 6.9140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.59%) |Training time=0.45s (20.85%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.38
-epoch: 0|step: 782|ppo_ep: 1|act_loss: 0.05810546875|cri_loss: 0.031005859375|unsuper_loss: 0.0
-average reward score: 6.40625
--------------------------------------------------------------------------------------
-|E2E latency=2.90s |Gather latency=0.00s (0.00%) |Generate time=1.62s (55.99%) |Training time=0.45s (15.46%) |Others=0.83 (28.55%)|CurSamplesPerSec=11.05 |AvgSamplesPerSec=14.38
-epoch: 0|step: 783|ppo_ep: 1|act_loss: 0.006511688232421875|cri_loss: 0.00395965576171875|unsuper_loss: 0.0
-average reward score: 6.203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.71%) |Training time=0.45s (20.72%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.38
-epoch: 0|step: 784|ppo_ep: 1|act_loss: -0.01220703125|cri_loss: -0.005767822265625|unsuper_loss: 0.0
-average reward score: 6.125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.60%) |Training time=0.45s (20.84%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.38
-epoch: 0|step: 785|ppo_ep: 1|act_loss: -0.0089111328125|cri_loss: -0.0036163330078125|unsuper_loss: 0.0
-average reward score: 6.1015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.94%) |Training time=0.44s (20.49%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.38
-epoch: 0|step: 786|ppo_ep: 1|act_loss: -0.0023040771484375|cri_loss: -0.0006747245788574219|unsuper_loss: 0.0
-average reward score: 6.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.71%) |Training time=0.45s (20.73%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.38
-epoch: 0|step: 787|ppo_ep: 1|act_loss: 0.01407623291015625|cri_loss: 0.00878143310546875|unsuper_loss: 0.0
-average reward score: 5.70703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.60%) |Training time=0.45s (20.85%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.38
-epoch: 0|step: 788|ppo_ep: 1|act_loss: 0.0009098052978515625|cri_loss: 0.0007901191711425781|unsuper_loss: 0.0
-average reward score: 6.359375
--------------------------------------------------------------------------------------
-|E2E latency=2.46s |Gather latency=0.00s (0.00%) |Generate time=1.62s (65.68%) |Training time=0.45s (18.26%) |Others=0.40 (16.06%)|CurSamplesPerSec=13.00 |AvgSamplesPerSec=14.38
-[2023-04-14 09:16:39,783] [INFO] [logging.py:96:log_dist] [Rank 0] step=790, skipped=11, lr=[9.486072554030733e-06, 9.486072554030733e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:16:39,801] [INFO] [timer.py:199:stop] epoch=0/micro_step=790/global_step=790, RunningAvgSamplesPerSec=107.30907836345817, CurrSamplesPerSec=113.32836398086337, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:16:39,895] [INFO] [logging.py:96:log_dist] [Rank 0] step=790, skipped=12, lr=[4.915312075301798e-06, 4.915312075301798e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 789|ppo_ep: 1|act_loss: 0.01332855224609375|cri_loss: 0.007904052734375|unsuper_loss: 0.0
-average reward score: 6.0234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.94%) |Training time=0.45s (20.51%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.38
-epoch: 0|step: 790|ppo_ep: 1|act_loss: 0.06280517578125|cri_loss: 0.036529541015625|unsuper_loss: 0.0
-average reward score: 5.82421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.14%) |Training time=0.44s (20.28%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.38
-epoch: 0|step: 791|ppo_ep: 1|act_loss: -0.03814697265625|cri_loss: -0.018585205078125|unsuper_loss: 0.0
-average reward score: 5.67578125
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.84s (78.81%) |Training time=0.40s (16.97%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.68 |AvgSamplesPerSec=14.38
-epoch: 0|step: 792|ppo_ep: 1|act_loss: 0.05023193359375|cri_loss: 0.0278167724609375|unsuper_loss: 0.0
-average reward score: 4.94921875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.94%) |Training time=0.44s (20.50%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.38
-epoch: 0|step: 793|ppo_ep: 1|act_loss: 0.00034809112548828125|cri_loss: 0.002155303955078125|unsuper_loss: 0.0
-average reward score: 5.09375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.10%) |Training time=0.44s (20.33%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.38
-epoch: 0|step: 794|ppo_ep: 1|act_loss: -0.011444091796875|cri_loss: -0.00421142578125|unsuper_loss: 0.0
-average reward score: 4.73046875
--------------------------------------------------------------------------------------
-|E2E latency=2.58s |Gather latency=0.00s (0.00%) |Generate time=1.62s (62.67%) |Training time=0.45s (17.42%) |Others=0.51 (19.91%)|CurSamplesPerSec=12.39 |AvgSamplesPerSec=14.38
-epoch: 0|step: 795|ppo_ep: 1|act_loss: 0.06146240234375|cri_loss: 0.0360107421875|unsuper_loss: 0.0
-average reward score: 4.29296875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.81%) |Training time=0.45s (20.62%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.38
-epoch: 0|step: 796|ppo_ep: 1|act_loss: 0.07598876953125|cri_loss: 0.0418701171875|unsuper_loss: 0.0
-average reward score: 4.9453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.89%) |Training time=0.45s (20.56%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.38
-epoch: 0|step: 797|ppo_ep: 1|act_loss: -0.06427001953125|cri_loss: -0.0310516357421875|unsuper_loss: 0.0
-average reward score: 5.421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.82%) |Training time=0.45s (20.62%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.38
-epoch: 0|step: 798|ppo_ep: 1|act_loss: 0.00328826904296875|cri_loss: 0.0026397705078125|unsuper_loss: 0.0
-average reward score: 5.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.72%) |Training time=0.45s (20.73%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.38
-[2023-04-14 09:17:02,019] [INFO] [logging.py:96:log_dist] [Rank 0] step=800, skipped=11, lr=[9.481237054164141e-06, 9.481237054164141e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:17:02,037] [INFO] [timer.py:199:stop] epoch=0/micro_step=800/global_step=800, RunningAvgSamplesPerSec=107.40037872515786, CurrSamplesPerSec=111.78615850382913, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:17:02,130] [INFO] [logging.py:96:log_dist] [Rank 0] step=800, skipped=12, lr=[4.912810213000723e-06, 4.912810213000723e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 799|ppo_ep: 1|act_loss: -0.02484130859375|cri_loss: -0.0116119384765625|unsuper_loss: 0.0
-average reward score: 5.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.69%) |Training time=0.45s (20.76%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.38
-epoch: 0|step: 800|ppo_ep: 1|act_loss: 0.037261962890625|cri_loss: 0.021484375|unsuper_loss: 0.0
-average reward score: 4.92578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.85%) |Training time=0.45s (20.59%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.38
-epoch: 0|step: 801|ppo_ep: 1|act_loss: -0.002933502197265625|cri_loss: -0.000164031982421875|unsuper_loss: 0.0
-average reward score: 5.44921875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.76%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.38
-epoch: 0|step: 802|ppo_ep: 1|act_loss: 0.0721435546875|cri_loss: 0.041412353515625|unsuper_loss: 0.0
-average reward score: 5.71484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.59%) |Training time=0.45s (20.86%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.38
-epoch: 0|step: 803|ppo_ep: 1|act_loss: 0.01995849609375|cri_loss: 0.01092529296875|unsuper_loss: 0.0
-average reward score: 5.3515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.73%) |Training time=0.45s (20.72%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.38
-epoch: 0|step: 804|ppo_ep: 1|act_loss: -0.00691986083984375|cri_loss: -0.0017642974853515625|unsuper_loss: 0.0
-average reward score: 5.37890625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.62%) |Training time=0.46s (20.87%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.38
-epoch: 0|step: 805|ppo_ep: 1|act_loss: -0.005359649658203125|cri_loss: -0.001361846923828125|unsuper_loss: 0.0
-average reward score: 5.765625
--------------------------------------------------------------------------------------
-|E2E latency=2.38s |Gather latency=0.00s (0.00%) |Generate time=1.78s (74.75%) |Training time=0.49s (20.79%) |Others=0.11 (4.46%)|CurSamplesPerSec=13.45 |AvgSamplesPerSec=14.38
-epoch: 0|step: 806|ppo_ep: 1|act_loss: 0.0159759521484375|cri_loss: 0.0096588134765625|unsuper_loss: 0.0
-average reward score: 5.68359375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.42%) |Training time=0.42s (19.06%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.38
-epoch: 0|step: 807|ppo_ep: 1|act_loss: -0.0023345947265625|cri_loss: 0.000698089599609375|unsuper_loss: 0.0
-average reward score: 5.48046875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.79%) |Training time=0.45s (20.67%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.38
-epoch: 0|step: 808|ppo_ep: 1|act_loss: 0.035400390625|cri_loss: 0.019683837890625|unsuper_loss: 0.0
-average reward score: 6.15625
--------------------------------------------------------------------------------------
-|E2E latency=2.76s |Gather latency=0.00s (0.00%) |Generate time=1.62s (58.90%) |Training time=0.44s (16.06%) |Others=0.69 (25.05%)|CurSamplesPerSec=11.60 |AvgSamplesPerSec=14.38
-[2023-04-14 09:17:24,516] [INFO] [logging.py:96:log_dist] [Rank 0] step=810, skipped=11, lr=[9.476332537631846e-06, 9.476332537631846e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:17:24,534] [INFO] [timer.py:199:stop] epoch=0/micro_step=810/global_step=810, RunningAvgSamplesPerSec=107.47037359827968, CurrSamplesPerSec=114.3687949064038, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:17:24,627] [INFO] [logging.py:96:log_dist] [Rank 0] step=810, skipped=12, lr=[4.910272587031704e-06, 4.910272587031704e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 809|ppo_ep: 1|act_loss: 0.0015316009521484375|cri_loss: 0.0021305084228515625|unsuper_loss: 0.0
-average reward score: 5.58984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.01%) |Training time=0.44s (20.45%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.38
-epoch: 0|step: 810|ppo_ep: 1|act_loss: 0.000553131103515625|cri_loss: 0.0010042190551757812|unsuper_loss: 0.0
-average reward score: 5.8359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.02%) |Training time=0.44s (20.42%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.38
-epoch: 0|step: 811|ppo_ep: 1|act_loss: -0.03125|cri_loss: -0.01335906982421875|unsuper_loss: 0.0
-average reward score: 5.8359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.07%) |Training time=0.44s (20.36%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.38
-epoch: 0|step: 812|ppo_ep: 1|act_loss: -0.0394287109375|cri_loss: -0.018096923828125|unsuper_loss: 0.0
-average reward score: 5.6953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.23%) |Training time=0.44s (20.22%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.38
-epoch: 0|step: 813|ppo_ep: 1|act_loss: 0.00807952880859375|cri_loss: 0.004505157470703125|unsuper_loss: 0.0
-average reward score: 5.90234375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.37%) |Training time=0.43s (20.07%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.38
-epoch: 0|step: 814|ppo_ep: 1|act_loss: 0.003143310546875|cri_loss: 0.0019350051879882812|unsuper_loss: 0.0
-average reward score: 5.6171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.41%) |Training time=0.44s (20.07%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.38
-epoch: 0|step: 815|ppo_ep: 1|act_loss: 0.00887298583984375|cri_loss: 0.00640106201171875|unsuper_loss: 0.0
-average reward score: 5.6328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.88%) |Training time=0.45s (20.60%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.38
-epoch: 0|step: 816|ppo_ep: 1|act_loss: -0.01953125|cri_loss: -0.009552001953125|unsuper_loss: 0.0
-average reward score: 5.63671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.18%) |Training time=0.44s (20.27%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.38
-epoch: 0|step: 817|ppo_ep: 1|act_loss: 0.0401611328125|cri_loss: 0.02154541015625|unsuper_loss: 0.0
-average reward score: 5.50390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.94%) |Training time=0.44s (20.51%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.38
-epoch: 0|step: 818|ppo_ep: 1|act_loss: 0.004489898681640625|cri_loss: 0.0026702880859375|unsuper_loss: 0.0
-average reward score: 5.6015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.90%) |Training time=0.45s (20.55%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.38
-[2023-04-14 09:17:46,179] [INFO] [logging.py:96:log_dist] [Rank 0] step=820, skipped=11, lr=[9.471359077130615e-06, 9.471359077130615e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:17:46,197] [INFO] [timer.py:199:stop] epoch=0/micro_step=820/global_step=820, RunningAvgSamplesPerSec=107.5542575745028, CurrSamplesPerSec=112.59810387019539, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:17:46,290] [INFO] [logging.py:96:log_dist] [Rank 0] step=820, skipped=12, lr=[4.907699235008478e-06, 4.907699235008478e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 819|ppo_ep: 1|act_loss: -0.003543853759765625|cri_loss: -0.0015096664428710938|unsuper_loss: 0.0
-average reward score: 5.57421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.89%) |Training time=0.45s (20.60%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.38
-epoch: 0|step: 820|ppo_ep: 1|act_loss: -0.02679443359375|cri_loss: -0.01294708251953125|unsuper_loss: 0.0
-average reward score: 5.2109375
--------------------------------------------------------------------------------------
-|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.05%) |Training time=0.44s (19.68%) |Others=0.16 (7.28%)|CurSamplesPerSec=14.30 |AvgSamplesPerSec=14.38
-epoch: 0|step: 821|ppo_ep: 1|act_loss: 0.0236663818359375|cri_loss: 0.012542724609375|unsuper_loss: 0.0
-average reward score: 5.87109375
--------------------------------------------------------------------------------------
-|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.72s (76.86%) |Training time=0.42s (18.71%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.28 |AvgSamplesPerSec=14.38
-epoch: 0|step: 822|ppo_ep: 1|act_loss: 0.0013227462768554688|cri_loss: 0.0012483596801757812|unsuper_loss: 0.0
-average reward score: 5.328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.81%) |Training time=0.45s (20.66%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.38
-epoch: 0|step: 823|ppo_ep: 1|act_loss: 0.02459716796875|cri_loss: 0.01287078857421875|unsuper_loss: 0.0
-average reward score: 5.5390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.78%) |Training time=0.45s (20.69%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.38
-epoch: 0|step: 824|ppo_ep: 1|act_loss: -0.045379638671875|cri_loss: -0.0219268798828125|unsuper_loss: 0.0
-average reward score: 6.2109375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.80%) |Training time=0.45s (20.67%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.38
-epoch: 0|step: 825|ppo_ep: 1|act_loss: -0.005863189697265625|cri_loss: -0.00244140625|unsuper_loss: 0.0
-average reward score: 5.37109375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.60%) |Training time=0.45s (20.87%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.38
-epoch: 0|step: 826|ppo_ep: 1|act_loss: -0.01385498046875|cri_loss: -0.006084442138671875|unsuper_loss: 0.0
-average reward score: 5.3203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.70%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39
-epoch: 0|step: 827|ppo_ep: 1|act_loss: -0.01480865478515625|cri_loss: -0.00696563720703125|unsuper_loss: 0.0
-average reward score: 5.6328125
--------------------------------------------------------------------------------------
-|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.63s (70.82%) |Training time=0.45s (19.77%) |Others=0.22 (9.42%)|CurSamplesPerSec=13.94 |AvgSamplesPerSec=14.38
-epoch: 0|step: 828|ppo_ep: 1|act_loss: 0.0229034423828125|cri_loss: 0.01232147216796875|unsuper_loss: 0.0
-average reward score: 6.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.72%) |Training time=0.45s (20.75%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.38
-[2023-04-14 09:18:08,145] [INFO] [logging.py:96:log_dist] [Rank 0] step=830, skipped=11, lr=[9.466316746379131e-06, 9.466316746379131e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:18:08,163] [INFO] [timer.py:199:stop] epoch=0/micro_step=830/global_step=830, RunningAvgSamplesPerSec=107.61573678676814, CurrSamplesPerSec=112.13863919103744, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:18:08,256] [INFO] [logging.py:96:log_dist] [Rank 0] step=830, skipped=12, lr=[4.905090195074332e-06, 4.905090195074332e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 829|ppo_ep: 1|act_loss: -0.01229095458984375|cri_loss: -0.0051116943359375|unsuper_loss: 0.0
-average reward score: 5.21875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.81%) |Training time=0.45s (20.67%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.39
-epoch: 0|step: 830|ppo_ep: 1|act_loss: 0.00736236572265625|cri_loss: 0.0042724609375|unsuper_loss: 0.0
-average reward score: 6.08203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.79%) |Training time=0.45s (20.68%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39
-epoch: 0|step: 831|ppo_ep: 1|act_loss: 0.0172576904296875|cri_loss: 0.01012420654296875|unsuper_loss: 0.0
-average reward score: 5.984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.88%) |Training time=0.45s (20.58%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39
-epoch: 0|step: 832|ppo_ep: 1|act_loss: -0.016693115234375|cri_loss: -0.00738525390625|unsuper_loss: 0.0
-average reward score: 5.65625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.80%) |Training time=0.45s (20.67%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.39
-epoch: 0|step: 833|ppo_ep: 1|act_loss: 0.01226806640625|cri_loss: 0.006694793701171875|unsuper_loss: 0.0
-average reward score: 5.84375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.89%) |Training time=0.45s (20.47%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.39
-epoch: 0|step: 834|ppo_ep: 1|act_loss: 0.035400390625|cri_loss: 0.018402099609375|unsuper_loss: 0.0
-average reward score: 6.38671875
--------------------------------------------------------------------------------------
-|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.74s (76.01%) |Training time=0.45s (19.69%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.96 |AvgSamplesPerSec=14.39
-epoch: 0|step: 835|ppo_ep: 1|act_loss: -0.01434326171875|cri_loss: -0.006683349609375|unsuper_loss: 0.0
-average reward score: 6.0
--------------------------------------------------------------------------------------
-|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.63s (72.83%) |Training time=0.49s (21.85%) |Others=0.12 (5.32%)|CurSamplesPerSec=14.30 |AvgSamplesPerSec=14.39
-epoch: 0|step: 836|ppo_ep: 1|act_loss: -0.04925537109375|cri_loss: -0.023651123046875|unsuper_loss: 0.0
-average reward score: 5.76953125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.86%) |Training time=0.43s (19.59%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.39
-epoch: 0|step: 837|ppo_ep: 1|act_loss: -0.054840087890625|cri_loss: -0.0267333984375|unsuper_loss: 0.0
-average reward score: 6.25390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.11%) |Training time=0.44s (20.33%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.39
-epoch: 0|step: 838|ppo_ep: 1|act_loss: -0.016571044921875|cri_loss: -0.0059051513671875|unsuper_loss: 0.0
-average reward score: 5.93359375
--------------------------------------------------------------------------------------
-|E2E latency=2.45s |Gather latency=0.00s (0.00%) |Generate time=1.62s (66.08%) |Training time=0.45s (18.42%) |Others=0.38 (15.50%)|CurSamplesPerSec=13.05 |AvgSamplesPerSec=14.39
-[2023-04-14 09:18:30,351] [INFO] [logging.py:96:log_dist] [Rank 0] step=840, skipped=11, lr=[9.461205620116899e-06, 9.461205620116899e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:18:30,370] [INFO] [timer.py:199:stop] epoch=0/micro_step=840/global_step=840, RunningAvgSamplesPerSec=107.66078239168438, CurrSamplesPerSec=111.81325820119396, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:18:30,463] [INFO] [logging.py:96:log_dist] [Rank 0] step=840, skipped=12, lr=[4.902445505901531e-06, 4.902445505901531e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 839|ppo_ep: 1|act_loss: -0.023773193359375|cri_loss: -0.0101318359375|unsuper_loss: 0.0
-average reward score: 6.68359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.75%) |Training time=0.45s (20.71%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.39
-epoch: 0|step: 840|ppo_ep: 1|act_loss: -0.01178741455078125|cri_loss: -0.005176544189453125|unsuper_loss: 0.0
-average reward score: 6.53515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.40%) |Training time=0.46s (21.07%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.39
-epoch: 0|step: 841|ppo_ep: 1|act_loss: 0.014923095703125|cri_loss: 0.00815582275390625|unsuper_loss: 0.0
-average reward score: 5.390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.68%) |Training time=0.45s (20.78%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39
-epoch: 0|step: 842|ppo_ep: 1|act_loss: -0.0009603500366210938|cri_loss: 0.0004177093505859375|unsuper_loss: 0.0
-average reward score: 5.6171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.80%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39
-epoch: 0|step: 843|ppo_ep: 1|act_loss: 0.0574951171875|cri_loss: 0.0333251953125|unsuper_loss: 0.0
-average reward score: 5.05078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.71%) |Training time=0.45s (20.76%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39
-epoch: 0|step: 844|ppo_ep: 1|act_loss: -0.001766204833984375|cri_loss: 0.002040863037109375|unsuper_loss: 0.0
-average reward score: 6.13671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.44%) |Training time=0.46s (21.03%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.39
-epoch: 0|step: 845|ppo_ep: 1|act_loss: -0.02178955078125|cri_loss: -0.00971221923828125|unsuper_loss: 0.0
-average reward score: 5.23046875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.60%) |Training time=0.45s (20.87%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39
-epoch: 0|step: 846|ppo_ep: 1|act_loss: -0.05322265625|cri_loss: -0.024993896484375|unsuper_loss: 0.0
-average reward score: 5.63671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.01%) |Training time=0.44s (20.46%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.39
-epoch: 0|step: 847|ppo_ep: 1|act_loss: -0.031646728515625|cri_loss: -0.01514434814453125|unsuper_loss: 0.0
-average reward score: 5.7109375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.40%) |Training time=0.46s (21.09%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.39
-epoch: 0|step: 848|ppo_ep: 1|act_loss: -0.044219970703125|cri_loss: -0.0208587646484375|unsuper_loss: 0.0
-average reward score: 5.25390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.47%) |Training time=0.46s (21.00%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39
-[2023-04-14 09:18:52,073] [INFO] [logging.py:96:log_dist] [Rank 0] step=850, skipped=11, lr=[9.456025774103137e-06, 9.456025774103137e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:18:52,091] [INFO] [timer.py:199:stop] epoch=0/micro_step=850/global_step=850, RunningAvgSamplesPerSec=107.68979385696973, CurrSamplesPerSec=109.89481785825113, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:18:52,184] [INFO] [logging.py:96:log_dist] [Rank 0] step=850, skipped=12, lr=[4.899765206690747e-06, 4.899765206690747e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 849|ppo_ep: 1|act_loss: 0.01556396484375|cri_loss: 0.0083770751953125|unsuper_loss: 0.0
-average reward score: 5.890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.55%) |Training time=0.45s (20.91%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.39
-epoch: 0|step: 850|ppo_ep: 1|act_loss: 0.027984619140625|cri_loss: 0.01593017578125|unsuper_loss: 0.0
-average reward score: 5.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.79%) |Training time=0.45s (20.67%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.39
-epoch: 0|step: 851|ppo_ep: 1|act_loss: -0.01447296142578125|cri_loss: -0.00623321533203125|unsuper_loss: 0.0
-average reward score: 5.75390625
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.65s (73.45%) |Training time=0.43s (18.96%) |Others=0.17 (7.60%)|CurSamplesPerSec=14.24 |AvgSamplesPerSec=14.39
-epoch: 0|step: 852|ppo_ep: 1|act_loss: -0.02020263671875|cri_loss: -0.00873565673828125|unsuper_loss: 0.0
-average reward score: 5.43359375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.95%) |Training time=0.44s (20.19%) |Others=0.11 (4.85%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.39
-epoch: 0|step: 853|ppo_ep: 1|act_loss: -0.04766845703125|cri_loss: -0.0230560302734375|unsuper_loss: 0.0
-average reward score: 5.13671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.05%) |Training time=0.44s (20.41%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.39
-epoch: 0|step: 854|ppo_ep: 1|act_loss: -0.03314208984375|cri_loss: -0.0153350830078125|unsuper_loss: 0.0
-average reward score: 5.53125
--------------------------------------------------------------------------------------
-|E2E latency=2.40s |Gather latency=0.00s (0.00%) |Generate time=1.62s (67.54%) |Training time=0.45s (18.83%) |Others=0.33 (13.63%)|CurSamplesPerSec=13.32 |AvgSamplesPerSec=14.39
-epoch: 0|step: 855|ppo_ep: 1|act_loss: 0.0093231201171875|cri_loss: 0.00567626953125|unsuper_loss: 0.0
-average reward score: 5.6171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.58%) |Training time=0.45s (20.90%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39
-epoch: 0|step: 856|ppo_ep: 1|act_loss: -0.0193023681640625|cri_loss: -0.00839996337890625|unsuper_loss: 0.0
-average reward score: 5.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.54%) |Training time=0.45s (20.93%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39
-epoch: 0|step: 857|ppo_ep: 1|act_loss: 0.003940582275390625|cri_loss: 0.003002166748046875|unsuper_loss: 0.0
-average reward score: 5.39453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.66%) |Training time=0.45s (20.80%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.39
-epoch: 0|step: 858|ppo_ep: 1|act_loss: 0.01480865478515625|cri_loss: 0.00885009765625|unsuper_loss: 0.0
-average reward score: 5.4609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.79%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.39
-[2023-04-14 09:19:14,107] [INFO] [logging.py:96:log_dist] [Rank 0] step=860, skipped=11, lr=[9.450777285115664e-06, 9.450777285115664e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:19:14,125] [INFO] [timer.py:199:stop] epoch=0/micro_step=860/global_step=860, RunningAvgSamplesPerSec=107.74103031128162, CurrSamplesPerSec=110.24423676217167, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:19:14,218] [INFO] [logging.py:96:log_dist] [Rank 0] step=860, skipped=12, lr=[4.897049337170483e-06, 4.897049337170483e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 859|ppo_ep: 1|act_loss: 0.00782012939453125|cri_loss: 0.005535125732421875|unsuper_loss: 0.0
-average reward score: 5.484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.63%) |Training time=0.45s (20.86%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.39
-epoch: 0|step: 860|ppo_ep: 1|act_loss: 0.06195068359375|cri_loss: 0.034088134765625|unsuper_loss: 0.0
-average reward score: 5.1875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.87%) |Training time=0.45s (20.58%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.39
-epoch: 0|step: 861|ppo_ep: 1|act_loss: -0.027069091796875|cri_loss: -0.012939453125|unsuper_loss: 0.0
-average reward score: 5.078125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.97%) |Training time=0.45s (20.53%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.39
-epoch: 0|step: 862|ppo_ep: 1|act_loss: -0.0242156982421875|cri_loss: -0.010772705078125|unsuper_loss: 0.0
-average reward score: 5.69921875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.53%) |Training time=0.46s (20.97%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.39
-epoch: 0|step: 863|ppo_ep: 1|act_loss: -0.0100555419921875|cri_loss: -0.00096893310546875|unsuper_loss: 0.0
-average reward score: 5.2421875
--------------------------------------------------------------------------------------
-|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.65s (73.54%) |Training time=0.49s (22.04%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.30 |AvgSamplesPerSec=14.39
-epoch: 0|step: 864|ppo_ep: 1|act_loss: 0.04193115234375|cri_loss: 0.028228759765625|unsuper_loss: 0.0
-average reward score: 5.171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.51%) |Training time=0.45s (20.95%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39
-epoch: 0|step: 865|ppo_ep: 1|act_loss: 0.020294189453125|cri_loss: 0.01458740234375|unsuper_loss: 0.0
-average reward score: 5.43359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.66%) |Training time=0.45s (20.81%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.39
-epoch: 0|step: 866|ppo_ep: 1|act_loss: -0.03167724609375|cri_loss: -0.014373779296875|unsuper_loss: 0.0
-average reward score: 5.01953125
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.82s (77.79%) |Training time=0.42s (17.89%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.39
-epoch: 0|step: 867|ppo_ep: 1|act_loss: 0.0034465789794921875|cri_loss: 0.00566864013671875|unsuper_loss: 0.0
-average reward score: 5.29296875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.94%) |Training time=0.45s (20.51%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.39
-epoch: 0|step: 868|ppo_ep: 1|act_loss: 0.0083160400390625|cri_loss: 0.008819580078125|unsuper_loss: 0.0
-average reward score: 5.12890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.58%) |Training time=0.45s (20.89%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39
-[2023-04-14 09:19:36,087] [INFO] [logging.py:96:log_dist] [Rank 0] step=870, skipped=11, lr=[9.445460230949745e-06, 9.445460230949745e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:19:36,491] [INFO] [timer.py:199:stop] epoch=0/micro_step=870/global_step=870, RunningAvgSamplesPerSec=107.61950179826276, CurrSamplesPerSec=47.67854553463432, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:19:36,584] [INFO] [logging.py:96:log_dist] [Rank 0] step=870, skipped=12, lr=[4.894297937596475e-06, 4.894297937596475e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 869|ppo_ep: 1|act_loss: -0.000762939453125|cri_loss: 0.0043182373046875|unsuper_loss: 0.0
-average reward score: 5.33984375
--------------------------------------------------------------------------------------
-|E2E latency=2.55s |Gather latency=0.00s (0.00%) |Generate time=1.62s (63.49%) |Training time=0.83s (32.66%) |Others=0.10 (3.85%)|CurSamplesPerSec=12.53 |AvgSamplesPerSec=14.39
-epoch: 0|step: 870|ppo_ep: 1|act_loss: 0.06591796875|cri_loss: 0.047149658203125|unsuper_loss: 0.0
-average reward score: 5.08984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.79%) |Training time=0.45s (20.68%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.39
-epoch: 0|step: 871|ppo_ep: 1|act_loss: -0.0975341796875|cri_loss: -0.04486083984375|unsuper_loss: 0.0
-average reward score: 5.6953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.50%) |Training time=0.46s (20.98%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.39
-epoch: 0|step: 872|ppo_ep: 1|act_loss: 0.01934814453125|cri_loss: 0.0106658935546875|unsuper_loss: 0.0
-average reward score: 5.3984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.51%) |Training time=0.46s (20.95%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.39
-epoch: 0|step: 873|ppo_ep: 1|act_loss: 0.0455322265625|cri_loss: 0.02703857421875|unsuper_loss: 0.0
-average reward score: 5.56640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.73%) |Training time=0.45s (20.76%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.39
-epoch: 0|step: 874|ppo_ep: 1|act_loss: 0.01427459716796875|cri_loss: 0.00791168212890625|unsuper_loss: 0.0
-average reward score: 5.6015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.98%) |Training time=0.45s (20.49%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.39
-epoch: 0|step: 875|ppo_ep: 1|act_loss: -0.04949951171875|cri_loss: -0.0236358642578125|unsuper_loss: 0.0
-average reward score: 5.3671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.65%) |Training time=0.45s (20.81%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.39
-epoch: 0|step: 876|ppo_ep: 1|act_loss: -0.004947662353515625|cri_loss: -0.00028228759765625|unsuper_loss: 0.0
-average reward score: 5.3359375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.73%) |Training time=0.45s (20.76%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.39
-epoch: 0|step: 877|ppo_ep: 1|act_loss: 0.0782470703125|cri_loss: 0.044403076171875|unsuper_loss: 0.0
-average reward score: 5.5234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.86%) |Training time=0.45s (20.61%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.39
-epoch: 0|step: 878|ppo_ep: 1|act_loss: 0.06304931640625|cri_loss: 0.037445068359375|unsuper_loss: 0.0
-average reward score: 5.4609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.64%) |Training time=0.45s (20.83%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.39
-[2023-04-14 09:19:58,214] [INFO] [logging.py:96:log_dist] [Rank 0] step=880, skipped=11, lr=[9.440074690416949e-06, 9.440074690416949e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:19:58,232] [INFO] [timer.py:199:stop] epoch=0/micro_step=880/global_step=880, RunningAvgSamplesPerSec=107.65370035299243, CurrSamplesPerSec=109.40803398863513, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:19:58,325] [INFO] [logging.py:96:log_dist] [Rank 0] step=880, skipped=12, lr=[4.891511048751102e-06, 4.891511048751102e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 879|ppo_ep: 1|act_loss: -0.0906982421875|cri_loss: -0.03826904296875|unsuper_loss: 0.0
-average reward score: 5.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.55%) |Training time=0.46s (20.94%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.39
-[2023-04-14 09:20:00,493] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 880|ppo_ep: 1|act_loss: 0.0443115234375|cri_loss: 0.02581787109375|unsuper_loss: 0.0
-average reward score: 5.79296875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.84%) |Training time=0.46s (21.08%) |Others=0.09 (4.09%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.39
-[2023-04-14 09:20:02,656] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 881|ppo_ep: 1|act_loss: -0.0081787109375|cri_loss: -5.340576171875e-05|unsuper_loss: 0.0
-average reward score: 5.12890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.09%) |Training time=0.43s (19.84%) |Others=0.09 (4.07%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.39
-epoch: 0|step: 882|ppo_ep: 1|act_loss: -0.0518798828125|cri_loss: -0.01336669921875|unsuper_loss: 0.0
-average reward score: 5.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.22%) |Training time=0.43s (18.55%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.39
-epoch: 0|step: 883|ppo_ep: 1|act_loss: -0.050872802734375|cri_loss: -0.01409912109375|unsuper_loss: 0.0
-average reward score: 5.2734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.36%) |Training time=0.46s (21.03%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.39
-epoch: 0|step: 884|ppo_ep: 1|act_loss: 0.0120086669921875|cri_loss: 0.01505279541015625|unsuper_loss: 0.0
-average reward score: 5.78125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.54%) |Training time=0.45s (20.91%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.39
-epoch: 0|step: 885|ppo_ep: 1|act_loss: 0.0875244140625|cri_loss: 0.05120849609375|unsuper_loss: 0.0
-average reward score: 4.94140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.13%) |Training time=0.44s (20.34%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.40
-[2023-04-14 09:20:13,670] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
-epoch: 0|step: 886|ppo_ep: 1|act_loss: -0.1021728515625|cri_loss: -0.035919189453125|unsuper_loss: 0.0
-average reward score: 5.3671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.50%) |Training time=0.44s (20.34%) |Others=0.09 (4.16%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.40
-epoch: 0|step: 887|ppo_ep: 1|act_loss: -0.0577392578125|cri_loss: -0.0184173583984375|unsuper_loss: 0.0
-average reward score: 5.171875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.99%) |Training time=0.45s (20.47%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.40
-epoch: 0|step: 888|ppo_ep: 1|act_loss: -0.0115966796875|cri_loss: 0.014251708984375|unsuper_loss: 0.0
-average reward score: 5.46484375
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.65s (71.35%) |Training time=0.44s (18.94%) |Others=0.22 (9.70%)|CurSamplesPerSec=13.82 |AvgSamplesPerSec=14.40
-[2023-04-14 09:20:20,226] [INFO] [logging.py:96:log_dist] [Rank 0] step=890, skipped=11, lr=[9.43462074334398e-06, 9.43462074334398e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:20:20,244] [INFO] [timer.py:199:stop] epoch=0/micro_step=890/global_step=890, RunningAvgSamplesPerSec=107.7239576261761, CurrSamplesPerSec=116.03634172425211, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:20:20,337] [INFO] [logging.py:96:log_dist] [Rank 0] step=890, skipped=15, lr=[4.889539132542428e-06, 4.889539132542428e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 889|ppo_ep: 1|act_loss: 0.06048583984375|cri_loss: 0.04217529296875|unsuper_loss: 0.0
-average reward score: 5.3984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.33%) |Training time=0.44s (20.16%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.40
-[2023-04-14 09:20:22,384] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 890|ppo_ep: 1|act_loss: -0.027069091796875|cri_loss: 0.01715087890625|unsuper_loss: 0.0
-average reward score: 6.2734375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.25%) |Training time=0.41s (19.15%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.40
-epoch: 0|step: 891|ppo_ep: 1|act_loss: -0.1314697265625|cri_loss: -0.045501708984375|unsuper_loss: 0.0
-average reward score: 5.5390625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.14%) |Training time=0.44s (20.33%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.40
-epoch: 0|step: 892|ppo_ep: 1|act_loss: -0.0782470703125|cri_loss: -0.028076171875|unsuper_loss: 0.0
-average reward score: 5.4453125
--------------------------------------------------------------------------------------
-|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.67s (74.39%) |Training time=0.44s (19.66%) |Others=0.13 (5.96%)|CurSamplesPerSec=14.28 |AvgSamplesPerSec=14.40
-epoch: 0|step: 893|ppo_ep: 1|act_loss: -0.04217529296875|cri_loss: 0.0130615234375|unsuper_loss: 0.0
-average reward score: 5.78125
--------------------------------------------------------------------------------------
-|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.70s (76.07%) |Training time=0.44s (19.51%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.32 |AvgSamplesPerSec=14.40
-epoch: 0|step: 894|ppo_ep: 1|act_loss: 0.11956787109375|cri_loss: 0.0692138671875|unsuper_loss: 0.0
-average reward score: 5.44140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.30%) |Training time=0.44s (20.15%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.40
-epoch: 0|step: 895|ppo_ep: 1|act_loss: 0.0298919677734375|cri_loss: 0.025909423828125|unsuper_loss: 0.0
-average reward score: 5.58984375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.14%) |Training time=0.44s (20.33%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.40
-epoch: 0|step: 896|ppo_ep: 1|act_loss: 0.169189453125|cri_loss: 0.094482421875|unsuper_loss: 0.0
-average reward score: 5.49609375
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.81s (77.34%) |Training time=0.43s (18.45%) |Others=0.10 (4.21%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.40
-[2023-04-14 09:20:37,862] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096
-epoch: 0|step: 897|ppo_ep: 1|act_loss: 0.0097503662109375|cri_loss: 0.008026123046875|unsuper_loss: 0.0
-average reward score: 5.234375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.43%) |Training time=0.41s (19.00%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.40
-epoch: 0|step: 898|ppo_ep: 1|act_loss: -0.071533203125|cri_loss: -0.0288238525390625|unsuper_loss: 0.0
-average reward score: 5.23828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.79%) |Training time=0.43s (19.67%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.40
-[2023-04-14 09:20:42,190] [INFO] [logging.py:96:log_dist] [Rank 0] step=900, skipped=13, lr=[9.43020838726756e-06, 9.43020838726756e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:20:42,208] [INFO] [timer.py:199:stop] epoch=0/micro_step=900/global_step=900, RunningAvgSamplesPerSec=107.83954100227736, CurrSamplesPerSec=118.38544986262222, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:20:42,302] [INFO] [logging.py:96:log_dist] [Rank 0] step=900, skipped=15, lr=[4.886692007019939e-06, 4.886692007019939e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 899|ppo_ep: 1|act_loss: -0.032318115234375|cri_loss: -0.01042938232421875|unsuper_loss: 0.0
-average reward score: 5.43359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.51%) |Training time=0.43s (19.90%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.40
-epoch: 0|step: 900|ppo_ep: 1|act_loss: -0.1773681640625|cri_loss: -0.0633544921875|unsuper_loss: 0.0
-average reward score: 5.58984375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.36%) |Training time=0.44s (20.11%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.40
-epoch: 0|step: 901|ppo_ep: 1|act_loss: -0.02392578125|cri_loss: 0.00042724609375|unsuper_loss: 0.0
-average reward score: 5.8125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.78%) |Training time=0.43s (19.68%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.40
-epoch: 0|step: 902|ppo_ep: 1|act_loss: 0.1036376953125|cri_loss: 0.0618896484375|unsuper_loss: 0.0
-average reward score: 5.5234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.45%) |Training time=0.43s (20.00%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.40
-epoch: 0|step: 903|ppo_ep: 1|act_loss: 0.25048828125|cri_loss: 0.17138671875|unsuper_loss: 0.0
-average reward score: 5.203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.30%) |Training time=0.44s (20.15%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.40
-epoch: 0|step: 904|ppo_ep: 1|act_loss: 0.1358642578125|cri_loss: 0.0823974609375|unsuper_loss: 0.0
-average reward score: 5.6796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.49%) |Training time=0.43s (19.97%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.40
-epoch: 0|step: 905|ppo_ep: 1|act_loss: -0.014801025390625|cri_loss: 0.017059326171875|unsuper_loss: 0.0
-average reward score: 6.02734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.28%) |Training time=0.44s (20.18%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.40
-epoch: 0|step: 906|ppo_ep: 1|act_loss: -0.05535888671875|cri_loss: -0.0138092041015625|unsuper_loss: 0.0
-average reward score: 5.35546875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.49%) |Training time=0.43s (19.97%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.40
-epoch: 0|step: 907|ppo_ep: 1|act_loss: -0.06268310546875|cri_loss: -0.0184478759765625|unsuper_loss: 0.0
-average reward score: 6.30859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.26%) |Training time=0.44s (20.21%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.40
-epoch: 0|step: 908|ppo_ep: 1|act_loss: 0.07330322265625|cri_loss: 0.0513916015625|unsuper_loss: 0.0
-average reward score: 5.71875
--------------------------------------------------------------------------------------
-|E2E latency=2.40s |Gather latency=0.00s (0.00%) |Generate time=1.64s (68.21%) |Training time=0.44s (18.19%) |Others=0.33 (13.60%)|CurSamplesPerSec=13.34 |AvgSamplesPerSec=14.40
-[2023-04-14 09:21:04,148] [INFO] [logging.py:96:log_dist] [Rank 0] step=910, skipped=13, lr=[9.424631512821333e-06, 9.424631512821333e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:21:04,166] [INFO] [timer.py:199:stop] epoch=0/micro_step=910/global_step=910, RunningAvgSamplesPerSec=107.93576925943141, CurrSamplesPerSec=117.37038319604036, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:21:04,259] [INFO] [logging.py:96:log_dist] [Rank 0] step=910, skipped=15, lr=[4.883809504964325e-06, 4.883809504964325e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 909|ppo_ep: 1|act_loss: -0.1029052734375|cri_loss: -0.020233154296875|unsuper_loss: 0.0
-average reward score: 5.65234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.45%) |Training time=0.44s (20.03%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.40
-epoch: 0|step: 910|ppo_ep: 1|act_loss: 0.165283203125|cri_loss: 0.11480712890625|unsuper_loss: 0.0
-average reward score: 5.48046875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.15%) |Training time=0.44s (20.32%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.40
-epoch: 0|step: 911|ppo_ep: 1|act_loss: 0.1444091796875|cri_loss: 0.0936279296875|unsuper_loss: 0.0
-average reward score: 5.84375
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.82s (77.78%) |Training time=0.42s (17.99%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.65 |AvgSamplesPerSec=14.40
-epoch: 0|step: 912|ppo_ep: 1|act_loss: 0.0100555419921875|cri_loss: 0.03411865234375|unsuper_loss: 0.0
-average reward score: 5.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.38%) |Training time=0.44s (20.09%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.40
-epoch: 0|step: 913|ppo_ep: 1|act_loss: 0.0283355712890625|cri_loss: 0.022918701171875|unsuper_loss: 0.0
-average reward score: 5.59375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.51%) |Training time=0.46s (20.97%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.40
-[2023-04-14 09:21:15,175] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 4096, reducing to 2048
-epoch: 0|step: 914|ppo_ep: 1|act_loss: -0.191162109375|cri_loss: -0.0084228515625|unsuper_loss: 0.0
-average reward score: 5.13671875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.52%) |Training time=0.43s (19.88%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.40
-[2023-04-14 09:21:17,321] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 2048, reducing to 1024
-epoch: 0|step: 915|ppo_ep: 1|act_loss: 0.1402587890625|cri_loss: 0.149658203125|unsuper_loss: 0.0
-average reward score: 5.3359375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.54%) |Training time=0.43s (19.88%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.40
-epoch: 0|step: 916|ppo_ep: 1|act_loss: -0.060089111328125|cri_loss: -0.01513671875|unsuper_loss: 0.0
-average reward score: 4.9765625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.59%) |Training time=0.46s (20.92%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.40
-epoch: 0|step: 917|ppo_ep: 1|act_loss: 0.202880859375|cri_loss: 0.1185302734375|unsuper_loss: 0.0
-average reward score: 5.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.68%) |Training time=0.45s (20.78%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.40
-epoch: 0|step: 918|ppo_ep: 1|act_loss: 0.038116455078125|cri_loss: 0.027862548828125|unsuper_loss: 0.0
-average reward score: 5.015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.59%) |Training time=0.45s (20.88%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.40
-[2023-04-14 09:21:26,014] [INFO] [logging.py:96:log_dist] [Rank 0] step=920, skipped=15, lr=[9.420120921365356e-06, 9.420120921365356e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:21:26,032] [INFO] [timer.py:199:stop] epoch=0/micro_step=920/global_step=920, RunningAvgSamplesPerSec=108.00241603015057, CurrSamplesPerSec=111.10003311039004, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:21:26,125] [INFO] [logging.py:96:log_dist] [Rank 0] step=920, skipped=15, lr=[4.880891669101221e-06, 4.880891669101221e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 919|ppo_ep: 1|act_loss: -0.0063018798828125|cri_loss: 0.0113983154296875|unsuper_loss: 0.0
-average reward score: 5.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.71%) |Training time=0.45s (20.75%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.40
-epoch: 0|step: 920|ppo_ep: 1|act_loss: 0.07421875|cri_loss: 0.04022216796875|unsuper_loss: 0.0
-average reward score: 5.41015625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.53%) |Training time=0.46s (20.94%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.40
-epoch: 0|step: 921|ppo_ep: 1|act_loss: 0.251953125|cri_loss: 0.1390380859375|unsuper_loss: 0.0
-average reward score: 4.82421875
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.66s (74.98%) |Training time=0.45s (20.53%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.40
-epoch: 0|step: 922|ppo_ep: 1|act_loss: 0.0997314453125|cri_loss: 0.060546875|unsuper_loss: 0.0
-average reward score: 5.390625
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.73s (75.79%) |Training time=0.45s (19.83%) |Others=0.10 (4.38%)|CurSamplesPerSec=14.05 |AvgSamplesPerSec=14.40
-epoch: 0|step: 923|ppo_ep: 1|act_loss: 0.0804443359375|cri_loss: 0.051025390625|unsuper_loss: 0.0
-average reward score: 5.59765625
--------------------------------------------------------------------------------------
-|E2E latency=2.73s |Gather latency=0.00s (0.00%) |Generate time=1.63s (59.51%) |Training time=0.45s (16.48%) |Others=0.66 (24.01%)|CurSamplesPerSec=11.71 |AvgSamplesPerSec=14.40
-epoch: 0|step: 924|ppo_ep: 1|act_loss: -0.0679931640625|cri_loss: -0.022064208984375|unsuper_loss: 0.0
-average reward score: 5.72265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.62%) |Training time=0.45s (20.85%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.40
-epoch: 0|step: 925|ppo_ep: 1|act_loss: -0.064208984375|cri_loss: -0.0222625732421875|unsuper_loss: 0.0
-average reward score: 5.40625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.48%) |Training time=0.46s (20.98%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.40
-epoch: 0|step: 926|ppo_ep: 1|act_loss: -0.00994873046875|cri_loss: 0.00093841552734375|unsuper_loss: 0.0
-average reward score: 6.06640625
--------------------------------------------------------------------------------------
-|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.81s (76.77%) |Training time=0.45s (19.00%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.61 |AvgSamplesPerSec=14.40
-epoch: 0|step: 927|ppo_ep: 1|act_loss: -0.01502227783203125|cri_loss: -0.00019073486328125|unsuper_loss: 0.0
-average reward score: 5.6796875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.46%) |Training time=0.43s (19.95%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.40
-epoch: 0|step: 928|ppo_ep: 1|act_loss: -0.0238800048828125|cri_loss: -0.0068359375|unsuper_loss: 0.0
-average reward score: 5.67578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.31%) |Training time=0.43s (20.13%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.40
-[2023-04-14 09:21:48,591] [INFO] [logging.py:96:log_dist] [Rank 0] step=930, skipped=15, lr=[9.414421387372385e-06, 9.414421387372385e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:21:48,609] [INFO] [timer.py:199:stop] epoch=0/micro_step=930/global_step=930, RunningAvgSamplesPerSec=108.04897948065681, CurrSamplesPerSec=117.29724807363407, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:21:48,702] [INFO] [logging.py:96:log_dist] [Rank 0] step=930, skipped=15, lr=[4.877938542679992e-06, 4.877938542679992e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 929|ppo_ep: 1|act_loss: -0.038818359375|cri_loss: -0.0157012939453125|unsuper_loss: 0.0
-average reward score: 5.97265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.26%) |Training time=0.44s (20.20%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.40
-epoch: 0|step: 930|ppo_ep: 1|act_loss: 0.05596923828125|cri_loss: 0.030120849609375|unsuper_loss: 0.0
-average reward score: 5.47265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.93%) |Training time=0.44s (20.53%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.40
-epoch: 0|step: 931|ppo_ep: 1|act_loss: 0.02618408203125|cri_loss: 0.01433563232421875|unsuper_loss: 0.0
-average reward score: 5.89453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.10%) |Training time=0.44s (20.36%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.40
-epoch: 0|step: 932|ppo_ep: 1|act_loss: 0.002109527587890625|cri_loss: 0.001583099365234375|unsuper_loss: 0.0
-average reward score: 5.8515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.03%) |Training time=0.44s (20.42%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.40
-epoch: 0|step: 933|ppo_ep: 1|act_loss: 0.03338623046875|cri_loss: 0.0186920166015625|unsuper_loss: 0.0
-average reward score: 5.828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.77%) |Training time=0.45s (20.69%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.40
-epoch: 0|step: 934|ppo_ep: 1|act_loss: 0.0928955078125|cri_loss: 0.05352783203125|unsuper_loss: 0.0
-average reward score: 5.23828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.92%) |Training time=0.45s (20.54%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.40
-epoch: 0|step: 935|ppo_ep: 1|act_loss: 0.0657958984375|cri_loss: 0.034393310546875|unsuper_loss: 0.0
-average reward score: 4.85546875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.11%) |Training time=0.44s (20.34%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.40
-epoch: 0|step: 936|ppo_ep: 1|act_loss: 0.0772705078125|cri_loss: 0.042449951171875|unsuper_loss: 0.0
-average reward score: 5.3515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.01%) |Training time=0.44s (20.45%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.40
-epoch: 0|step: 937|ppo_ep: 1|act_loss: -0.01788330078125|cri_loss: -0.008026123046875|unsuper_loss: 0.0
-average reward score: 5.3515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.80%) |Training time=0.45s (20.65%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.40
-epoch: 0|step: 938|ppo_ep: 1|act_loss: -0.053955078125|cri_loss: -0.024993896484375|unsuper_loss: 0.0
-average reward score: 5.55859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.99%) |Training time=0.44s (20.46%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.40
-[2023-04-14 09:22:10,262] [INFO] [logging.py:96:log_dist] [Rank 0] step=940, skipped=15, lr=[9.408653827083077e-06, 9.408653827083077e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:22:10,280] [INFO] [timer.py:199:stop] epoch=0/micro_step=940/global_step=940, RunningAvgSamplesPerSec=108.10580988208648, CurrSamplesPerSec=113.72648917961666, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:22:10,374] [INFO] [logging.py:96:log_dist] [Rank 0] step=940, skipped=15, lr=[4.874950169473097e-06, 4.874950169473097e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 939|ppo_ep: 1|act_loss: -0.029998779296875|cri_loss: -0.0115203857421875|unsuper_loss: 0.0
-average reward score: 5.453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.93%) |Training time=0.44s (20.46%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.40
-epoch: 0|step: 940|ppo_ep: 1|act_loss: -0.0288848876953125|cri_loss: -0.0118560791015625|unsuper_loss: 0.0
-average reward score: 5.890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.74%) |Training time=0.45s (20.72%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.40
-epoch: 0|step: 941|ppo_ep: 1|act_loss: -0.04986572265625|cri_loss: -0.02099609375|unsuper_loss: 0.0
-average reward score: 5.5234375
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.98%) |Training time=0.44s (18.78%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.74 |AvgSamplesPerSec=14.40
-epoch: 0|step: 942|ppo_ep: 1|act_loss: -0.00787353515625|cri_loss: -0.002758026123046875|unsuper_loss: 0.0
-average reward score: 5.05859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.30%) |Training time=0.44s (20.16%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.40
-epoch: 0|step: 943|ppo_ep: 1|act_loss: -0.0435791015625|cri_loss: -0.0201873779296875|unsuper_loss: 0.0
-average reward score: 6.0078125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.14%) |Training time=0.44s (20.34%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.40
-epoch: 0|step: 944|ppo_ep: 1|act_loss: 0.05010986328125|cri_loss: 0.0257720947265625|unsuper_loss: 0.0
-average reward score: 4.5703125
--------------------------------------------------------------------------------------
-|E2E latency=2.45s |Gather latency=0.00s (0.00%) |Generate time=1.62s (66.19%) |Training time=0.44s (18.12%) |Others=0.38 (15.68%)|CurSamplesPerSec=13.04 |AvgSamplesPerSec=14.40
-epoch: 0|step: 945|ppo_ep: 1|act_loss: 0.0153350830078125|cri_loss: 0.00891876220703125|unsuper_loss: 0.0
-average reward score: 4.94140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.84%) |Training time=0.45s (20.62%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.40
-epoch: 0|step: 946|ppo_ep: 1|act_loss: 0.058349609375|cri_loss: 0.033203125|unsuper_loss: 0.0
-average reward score: 4.88671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.88%) |Training time=0.45s (20.58%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.40
-epoch: 0|step: 947|ppo_ep: 1|act_loss: 0.108642578125|cri_loss: 0.0631103515625|unsuper_loss: 0.0
-average reward score: 4.83203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.35%) |Training time=0.43s (20.08%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.40
-epoch: 0|step: 948|ppo_ep: 1|act_loss: 0.010772705078125|cri_loss: 0.00787353515625|unsuper_loss: 0.0
-average reward score: 4.84375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.70%) |Training time=0.43s (19.76%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.40
-[2023-04-14 09:22:32,403] [INFO] [logging.py:96:log_dist] [Rank 0] step=950, skipped=15, lr=[9.402818325986586e-06, 9.402818325986586e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:22:32,421] [INFO] [timer.py:199:stop] epoch=0/micro_step=950/global_step=950, RunningAvgSamplesPerSec=108.17638441637497, CurrSamplesPerSec=115.0792441441233, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:22:32,514] [INFO] [logging.py:96:log_dist] [Rank 0] step=950, skipped=15, lr=[4.871926593775433e-06, 4.871926593775433e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 949|ppo_ep: 1|act_loss: -0.05145263671875|cri_loss: -0.0225830078125|unsuper_loss: 0.0
-average reward score: 4.84375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.19%) |Training time=0.44s (20.27%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.40
-epoch: 0|step: 950|ppo_ep: 1|act_loss: 0.0074462890625|cri_loss: 0.004741668701171875|unsuper_loss: 0.0
-average reward score: 4.765625
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.64s (70.07%) |Training time=0.46s (19.71%) |Others=0.24 (10.22%)|CurSamplesPerSec=13.65 |AvgSamplesPerSec=14.40
-epoch: 0|step: 951|ppo_ep: 1|act_loss: -0.062255859375|cri_loss: -0.0283966064453125|unsuper_loss: 0.0
-average reward score: 4.30859375
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.73s (76.23%) |Training time=0.44s (19.42%) |Others=0.10 (4.35%)|CurSamplesPerSec=14.10 |AvgSamplesPerSec=14.40
-epoch: 0|step: 952|ppo_ep: 1|act_loss: -0.0128021240234375|cri_loss: -0.003631591796875|unsuper_loss: 0.0
-average reward score: 4.62890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.73%) |Training time=0.45s (20.74%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.40
-epoch: 0|step: 953|ppo_ep: 1|act_loss: -0.0440673828125|cri_loss: -0.0195465087890625|unsuper_loss: 0.0
-average reward score: 4.96484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.51%) |Training time=0.46s (20.96%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.40
-epoch: 0|step: 954|ppo_ep: 1|act_loss: -0.02337646484375|cri_loss: -0.0098724365234375|unsuper_loss: 0.0
-average reward score: 4.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.07%) |Training time=0.44s (20.36%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.40
-epoch: 0|step: 955|ppo_ep: 1|act_loss: 0.00646209716796875|cri_loss: 0.005374908447265625|unsuper_loss: 0.0
-average reward score: 4.6484375
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.64s (72.41%) |Training time=0.44s (19.38%) |Others=0.19 (8.21%)|CurSamplesPerSec=14.15 |AvgSamplesPerSec=14.40
-epoch: 0|step: 956|ppo_ep: 1|act_loss: -0.00202178955078125|cri_loss: 0.002307891845703125|unsuper_loss: 0.0
-average reward score: 4.7109375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.90%) |Training time=0.43s (19.56%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.40
-epoch: 0|step: 957|ppo_ep: 1|act_loss: 0.0703125|cri_loss: 0.038421630859375|unsuper_loss: 0.0
-average reward score: 4.63671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.01%) |Training time=0.42s (19.45%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.40
-epoch: 0|step: 958|ppo_ep: 1|act_loss: 0.0372314453125|cri_loss: 0.0194244384765625|unsuper_loss: 0.0
-average reward score: 4.56640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.19%) |Training time=0.42s (19.25%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.40
-[2023-04-14 09:22:54,486] [INFO] [logging.py:96:log_dist] [Rank 0] step=960, skipped=15, lr=[9.396914970579121e-06, 9.396914970579121e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:22:54,504] [INFO] [timer.py:199:stop] epoch=0/micro_step=960/global_step=960, RunningAvgSamplesPerSec=108.26503201681297, CurrSamplesPerSec=133.41457227490508, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:22:54,597] [INFO] [logging.py:96:log_dist] [Rank 0] step=960, skipped=15, lr=[4.86886786040369e-06, 4.86886786040369e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 959|ppo_ep: 1|act_loss: 0.00933074951171875|cri_loss: 0.005535125732421875|unsuper_loss: 0.0
-average reward score: 4.99609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.89%) |Training time=0.40s (18.57%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.40
-epoch: 0|step: 960|ppo_ep: 1|act_loss: 0.065673828125|cri_loss: 0.034149169921875|unsuper_loss: 0.0
-average reward score: 5.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.00%) |Training time=0.45s (20.47%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.40
-epoch: 0|step: 961|ppo_ep: 1|act_loss: 0.04327392578125|cri_loss: 0.023590087890625|unsuper_loss: 0.0
-average reward score: 4.6796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.43%) |Training time=0.43s (20.02%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.41
-epoch: 0|step: 962|ppo_ep: 1|act_loss: -0.0791015625|cri_loss: -0.03839111328125|unsuper_loss: 0.0
-average reward score: 5.015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.31%) |Training time=0.44s (20.15%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41
-epoch: 0|step: 963|ppo_ep: 1|act_loss: 0.05120849609375|cri_loss: 0.027435302734375|unsuper_loss: 0.0
-average reward score: 5.18359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.79%) |Training time=0.43s (19.67%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
-epoch: 0|step: 964|ppo_ep: 1|act_loss: 0.0233154296875|cri_loss: 0.014129638671875|unsuper_loss: 0.0
-average reward score: 5.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.50%) |Training time=0.43s (19.95%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41
-epoch: 0|step: 965|ppo_ep: 1|act_loss: 0.08660888671875|cri_loss: 0.04522705078125|unsuper_loss: 0.0
-average reward score: 4.3359375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.68%) |Training time=0.45s (20.78%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.41
-epoch: 0|step: 966|ppo_ep: 1|act_loss: -0.00670623779296875|cri_loss: -0.0023937225341796875|unsuper_loss: 0.0
-average reward score: 5.03125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.71%) |Training time=0.45s (20.76%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.41
-epoch: 0|step: 967|ppo_ep: 1|act_loss: -0.03643798828125|cri_loss: -0.016998291015625|unsuper_loss: 0.0
-average reward score: 5.109375
--------------------------------------------------------------------------------------
-|E2E latency=2.44s |Gather latency=0.00s (0.00%) |Generate time=1.63s (66.53%) |Training time=0.45s (18.34%) |Others=0.37 (15.13%)|CurSamplesPerSec=13.09 |AvgSamplesPerSec=14.41
-epoch: 0|step: 968|ppo_ep: 1|act_loss: -0.0234375|cri_loss: -0.0086212158203125|unsuper_loss: 0.0
-average reward score: 5.3125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.90%) |Training time=0.44s (20.54%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41
-[2023-04-14 09:23:16,482] [INFO] [logging.py:96:log_dist] [Rank 0] step=970, skipped=15, lr=[9.390943848362648e-06, 9.390943848362648e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:23:16,499] [INFO] [timer.py:199:stop] epoch=0/micro_step=970/global_step=970, RunningAvgSamplesPerSec=108.32570871091323, CurrSamplesPerSec=111.58485938589877, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:23:16,592] [INFO] [logging.py:96:log_dist] [Rank 0] step=970, skipped=15, lr=[4.8657740146956724e-06, 4.8657740146956724e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 969|ppo_ep: 1|act_loss: -0.00354766845703125|cri_loss: -0.0011920928955078125|unsuper_loss: 0.0
-average reward score: 5.51171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.65%) |Training time=0.45s (20.83%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
-epoch: 0|step: 970|ppo_ep: 1|act_loss: -0.0154571533203125|cri_loss: -0.00266265869140625|unsuper_loss: 0.0
-average reward score: 5.08984375
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.65s (74.54%) |Training time=0.46s (20.98%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.46 |AvgSamplesPerSec=14.41
-epoch: 0|step: 971|ppo_ep: 1|act_loss: -0.0240478515625|cri_loss: -0.01140594482421875|unsuper_loss: 0.0
-average reward score: 5.2421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.38%) |Training time=0.41s (19.03%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41
-epoch: 0|step: 972|ppo_ep: 1|act_loss: -0.014373779296875|cri_loss: -0.00548553466796875|unsuper_loss: 0.0
-average reward score: 5.0625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.47%) |Training time=0.43s (19.96%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.41
-epoch: 0|step: 973|ppo_ep: 1|act_loss: -0.0204620361328125|cri_loss: -0.00933837890625|unsuper_loss: 0.0
-average reward score: 6.5625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.12%) |Training time=0.44s (20.34%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41
-epoch: 0|step: 974|ppo_ep: 1|act_loss: 0.0244140625|cri_loss: 0.0131072998046875|unsuper_loss: 0.0
-average reward score: 5.09765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.16%) |Training time=0.44s (20.33%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41
-epoch: 0|step: 975|ppo_ep: 1|act_loss: 0.022674560546875|cri_loss: 0.013916015625|unsuper_loss: 0.0
-average reward score: 6.2734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.12%) |Training time=0.44s (20.33%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
-epoch: 0|step: 976|ppo_ep: 1|act_loss: -0.0102996826171875|cri_loss: -0.003696441650390625|unsuper_loss: 0.0
-average reward score: 6.1328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.92%) |Training time=0.45s (20.54%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41
-epoch: 0|step: 977|ppo_ep: 1|act_loss: -0.0123748779296875|cri_loss: -0.00479888916015625|unsuper_loss: 0.0
-average reward score: 5.69140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.66%) |Training time=0.45s (20.81%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41
-epoch: 0|step: 978|ppo_ep: 1|act_loss: -0.020172119140625|cri_loss: -0.00809478759765625|unsuper_loss: 0.0
-average reward score: 5.91015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.85%) |Training time=0.45s (20.62%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41
-[2023-04-14 09:23:38,256] [INFO] [logging.py:96:log_dist] [Rank 0] step=980, skipped=15, lr=[9.384905047843602e-06, 9.384905047843602e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:23:38,616] [INFO] [timer.py:199:stop] epoch=0/micro_step=980/global_step=980, RunningAvgSamplesPerSec=108.26986922621812, CurrSamplesPerSec=51.679806183969674, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:23:38,710] [INFO] [logging.py:96:log_dist] [Rank 0] step=980, skipped=15, lr=[4.862645102509638e-06, 4.862645102509638e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 979|ppo_ep: 1|act_loss: -0.041595458984375|cri_loss: -0.0178680419921875|unsuper_loss: 0.0
-average reward score: 5.859375
--------------------------------------------------------------------------------------
-|E2E latency=2.53s |Gather latency=0.00s (0.00%) |Generate time=1.64s (64.81%) |Training time=0.79s (31.28%) |Others=0.10 (3.91%)|CurSamplesPerSec=12.67 |AvgSamplesPerSec=14.41
-epoch: 0|step: 980|ppo_ep: 1|act_loss: 0.01123046875|cri_loss: 0.0067138671875|unsuper_loss: 0.0
-average reward score: 6.11328125
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.73s (75.98%) |Training time=0.45s (19.68%) |Others=0.10 (4.34%)|CurSamplesPerSec=14.08 |AvgSamplesPerSec=14.41
-epoch: 0|step: 981|ppo_ep: 1|act_loss: 0.071533203125|cri_loss: 0.037384033203125|unsuper_loss: 0.0
-average reward score: 5.9609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.54%) |Training time=0.45s (20.92%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41
-epoch: 0|step: 982|ppo_ep: 1|act_loss: -0.0034942626953125|cri_loss: -8.7738037109375e-05|unsuper_loss: 0.0
-average reward score: 5.921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.84%) |Training time=0.45s (20.61%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
-epoch: 0|step: 983|ppo_ep: 1|act_loss: -0.00067138671875|cri_loss: 3.4809112548828125e-05|unsuper_loss: 0.0
-average reward score: 5.76953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.72%) |Training time=0.45s (20.74%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41
-epoch: 0|step: 984|ppo_ep: 1|act_loss: -0.009124755859375|cri_loss: -0.004352569580078125|unsuper_loss: 0.0
-average reward score: 6.12890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.54%) |Training time=0.45s (20.91%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41
-epoch: 0|step: 985|ppo_ep: 1|act_loss: -0.0148773193359375|cri_loss: -0.00699615478515625|unsuper_loss: 0.0
-average reward score: 5.7890625
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.00%) |Training time=0.44s (18.75%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.41
-epoch: 0|step: 986|ppo_ep: 1|act_loss: -0.018524169921875|cri_loss: -0.0087890625|unsuper_loss: 0.0
-average reward score: 5.6640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.38%) |Training time=0.44s (20.06%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41
-epoch: 0|step: 987|ppo_ep: 1|act_loss: 0.069580078125|cri_loss: 0.036865234375|unsuper_loss: 0.0
-average reward score: 5.77734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.34%) |Training time=0.44s (20.12%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
-epoch: 0|step: 988|ppo_ep: 1|act_loss: 0.01471710205078125|cri_loss: 0.00804901123046875|unsuper_loss: 0.0
-average reward score: 6.09375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.91%) |Training time=0.45s (20.53%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41
-[2023-04-14 09:24:00,562] [INFO] [logging.py:96:log_dist] [Rank 0] step=990, skipped=15, lr=[9.378798658531574e-06, 9.378798658531574e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:24:00,580] [INFO] [timer.py:199:stop] epoch=0/micro_step=990/global_step=990, RunningAvgSamplesPerSec=108.31861477028136, CurrSamplesPerSec=114.56618951285925, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:24:00,673] [INFO] [logging.py:96:log_dist] [Rank 0] step=990, skipped=15, lr=[4.859481170223614e-06, 4.859481170223614e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 989|ppo_ep: 1|act_loss: 0.0231475830078125|cri_loss: 0.0121002197265625|unsuper_loss: 0.0
-average reward score: 6.42578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.06%) |Training time=0.44s (20.41%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
-epoch: 0|step: 990|ppo_ep: 1|act_loss: -0.007503509521484375|cri_loss: -0.003055572509765625|unsuper_loss: 0.0
-average reward score: 6.16015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.04%) |Training time=0.44s (20.39%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
-epoch: 0|step: 991|ppo_ep: 1|act_loss: 0.0267181396484375|cri_loss: 0.01448822021484375|unsuper_loss: 0.0
-average reward score: 6.234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.72%) |Training time=0.45s (20.65%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
-epoch: 0|step: 992|ppo_ep: 1|act_loss: 0.029205322265625|cri_loss: 0.017303466796875|unsuper_loss: 0.0
-average reward score: 6.21875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.71%) |Training time=0.45s (20.74%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
-epoch: 0|step: 993|ppo_ep: 1|act_loss: -0.031524658203125|cri_loss: -0.01493072509765625|unsuper_loss: 0.0
-average reward score: 5.78515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.63%) |Training time=0.45s (20.84%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.41
-epoch: 0|step: 994|ppo_ep: 1|act_loss: -0.037261962890625|cri_loss: -0.016632080078125|unsuper_loss: 0.0
-average reward score: 5.89453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.71%) |Training time=0.45s (20.74%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
-epoch: 0|step: 995|ppo_ep: 1|act_loss: 0.0576171875|cri_loss: 0.0328369140625|unsuper_loss: 0.0
-average reward score: 6.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.40%) |Training time=0.46s (20.98%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41
-epoch: 0|step: 996|ppo_ep: 1|act_loss: 0.0141754150390625|cri_loss: 0.0078125|unsuper_loss: 0.0
-average reward score: 5.375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.63%) |Training time=0.45s (20.84%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41
-epoch: 0|step: 997|ppo_ep: 1|act_loss: 0.04144287109375|cri_loss: 0.0221099853515625|unsuper_loss: 0.0
-average reward score: 6.1640625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.45%) |Training time=0.46s (21.01%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41
-epoch: 0|step: 998|ppo_ep: 1|act_loss: 0.00325775146484375|cri_loss: 0.0024585723876953125|unsuper_loss: 0.0
-average reward score: 5.97265625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.88%) |Training time=0.45s (20.62%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.41
-[2023-04-14 09:24:22,297] [INFO] [logging.py:96:log_dist] [Rank 0] step=1000, skipped=15, lr=[9.372624770937987e-06, 9.372624770937987e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:24:22,315] [INFO] [timer.py:199:stop] epoch=0/micro_step=1000/global_step=1000, RunningAvgSamplesPerSec=108.34636245517726, CurrSamplesPerSec=111.06482200944505, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:24:22,408] [INFO] [logging.py:96:log_dist] [Rank 0] step=1000, skipped=15, lr=[4.856282264734708e-06, 4.856282264734708e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 999|ppo_ep: 1|act_loss: 0.003055572509765625|cri_loss: 0.0018558502197265625|unsuper_loss: 0.0
-average reward score: 6.09375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.69%) |Training time=0.45s (20.79%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1000|ppo_ep: 1|act_loss: -0.008026123046875|cri_loss: -0.003406524658203125|unsuper_loss: 0.0
-average reward score: 5.75390625
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.68%) |Training time=0.44s (19.02%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.68 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1001|ppo_ep: 1|act_loss: -0.03692626953125|cri_loss: -0.017974853515625|unsuper_loss: 0.0
-average reward score: 5.84375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.69%) |Training time=0.43s (19.77%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1002|ppo_ep: 1|act_loss: -0.041015625|cri_loss: -0.0196990966796875|unsuper_loss: 0.0
-average reward score: 5.515625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.52%) |Training time=0.43s (19.95%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1003|ppo_ep: 1|act_loss: 0.01285552978515625|cri_loss: 0.006832122802734375|unsuper_loss: 0.0
-average reward score: 5.49609375
--------------------------------------------------------------------------------------
-|E2E latency=3.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (51.14%) |Training time=0.44s (14.02%) |Others=1.10 (34.84%)|CurSamplesPerSec=10.12 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1004|ppo_ep: 1|act_loss: 0.014739990234375|cri_loss: 0.0085296630859375|unsuper_loss: 0.0
-average reward score: 5.42578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.93%) |Training time=0.44s (20.51%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1005|ppo_ep: 1|act_loss: 0.0325927734375|cri_loss: 0.019317626953125|unsuper_loss: 0.0
-average reward score: 5.9453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.77%) |Training time=0.45s (20.66%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1006|ppo_ep: 1|act_loss: 0.00997161865234375|cri_loss: 0.00556182861328125|unsuper_loss: 0.0
-average reward score: 5.7265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.56%) |Training time=0.45s (20.89%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1007|ppo_ep: 1|act_loss: 0.01336669921875|cri_loss: 0.00836944580078125|unsuper_loss: 0.0
-average reward score: 5.4453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.01%) |Training time=0.44s (20.44%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1008|ppo_ep: 1|act_loss: -0.04754638671875|cri_loss: -0.018310546875|unsuper_loss: 0.0
-average reward score: 5.71875
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.46%) |Training time=0.46s (21.05%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.41
-[2023-04-14 09:24:45,254] [INFO] [logging.py:96:log_dist] [Rank 0] step=1010, skipped=15, lr=[9.366383476574745e-06, 9.366383476574745e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:24:45,272] [INFO] [timer.py:199:stop] epoch=0/micro_step=1010/global_step=1010, RunningAvgSamplesPerSec=108.39448732121316, CurrSamplesPerSec=110.60107949535652, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:24:45,365] [INFO] [logging.py:96:log_dist] [Rank 0] step=1010, skipped=15, lr=[4.853048433458417e-06, 4.853048433458417e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1009|ppo_ep: 1|act_loss: -0.010223388671875|cri_loss: -0.004230499267578125|unsuper_loss: 0.0
-average reward score: 5.6328125
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.75%) |Training time=0.45s (19.93%) |Others=0.10 (4.32%)|CurSamplesPerSec=14.10 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1010|ppo_ep: 1|act_loss: -0.05865478515625|cri_loss: -0.0288543701171875|unsuper_loss: 0.0
-average reward score: 5.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.53%) |Training time=0.45s (20.93%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1011|ppo_ep: 1|act_loss: -0.03656005859375|cri_loss: -0.017578125|unsuper_loss: 0.0
-average reward score: 5.47265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.98%) |Training time=0.44s (20.47%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1012|ppo_ep: 1|act_loss: -0.009002685546875|cri_loss: -0.003559112548828125|unsuper_loss: 0.0
-average reward score: 5.359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.94%) |Training time=0.44s (20.52%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1013|ppo_ep: 1|act_loss: 0.029022216796875|cri_loss: 0.0154571533203125|unsuper_loss: 0.0
-average reward score: 5.85546875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.88%) |Training time=0.45s (20.57%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1014|ppo_ep: 1|act_loss: 0.0226593017578125|cri_loss: 0.01175689697265625|unsuper_loss: 0.0
-average reward score: 5.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.72%) |Training time=0.45s (20.73%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1015|ppo_ep: 1|act_loss: -0.0012493133544921875|cri_loss: 0.0007171630859375|unsuper_loss: 0.0
-average reward score: 5.66015625
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.00%) |Training time=0.44s (18.78%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.70 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1016|ppo_ep: 1|act_loss: 0.038787841796875|cri_loss: 0.020965576171875|unsuper_loss: 0.0
-average reward score: 5.43359375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.84%) |Training time=0.43s (19.62%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1017|ppo_ep: 1|act_loss: -0.019256591796875|cri_loss: -0.00925445556640625|unsuper_loss: 0.0
-average reward score: 5.65625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.90%) |Training time=0.44s (20.55%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1018|ppo_ep: 1|act_loss: -0.06585693359375|cri_loss: -0.031982421875|unsuper_loss: 0.0
-average reward score: 6.078125
--------------------------------------------------------------------------------------
-|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.62s (76.81%) |Training time=0.39s (18.33%) |Others=0.10 (4.85%)|CurSamplesPerSec=15.17 |AvgSamplesPerSec=14.41
-[2023-04-14 09:25:07,744] [INFO] [logging.py:96:log_dist] [Rank 0] step=1020, skipped=15, lr=[9.360074867952892e-06, 9.360074867952892e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:25:07,762] [INFO] [timer.py:199:stop] epoch=0/micro_step=1020/global_step=1020, RunningAvgSamplesPerSec=108.46349582136385, CurrSamplesPerSec=110.18052442942728, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:25:07,855] [INFO] [logging.py:96:log_dist] [Rank 0] step=1020, skipped=15, lr=[4.849779724327923e-06, 4.849779724327923e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1019|ppo_ep: 1|act_loss: 0.060546875|cri_loss: 0.03167724609375|unsuper_loss: 0.0
-average reward score: 5.390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.60%) |Training time=0.45s (20.88%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1020|ppo_ep: 1|act_loss: 0.036865234375|cri_loss: 0.019134521484375|unsuper_loss: 0.0
-average reward score: 5.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.89%) |Training time=0.45s (20.57%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1021|ppo_ep: 1|act_loss: 0.0103302001953125|cri_loss: 0.00662994384765625|unsuper_loss: 0.0
-average reward score: 5.359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.70%) |Training time=0.45s (20.77%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1022|ppo_ep: 1|act_loss: -0.0287017822265625|cri_loss: -0.01360321044921875|unsuper_loss: 0.0
-average reward score: 5.421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.06%) |Training time=0.44s (20.39%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1023|ppo_ep: 1|act_loss: 8.0108642578125e-05|cri_loss: 0.00118255615234375|unsuper_loss: 0.0
-average reward score: 4.91015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.97%) |Training time=0.44s (20.47%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1024|ppo_ep: 1|act_loss: -0.00025081634521484375|cri_loss: 0.0006818771362304688|unsuper_loss: 0.0
-average reward score: 5.42578125
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.55%) |Training time=0.45s (20.64%) |Others=0.11 (4.81%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1025|ppo_ep: 1|act_loss: 0.051239013671875|cri_loss: 0.027984619140625|unsuper_loss: 0.0
-average reward score: 5.265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.83%) |Training time=0.45s (20.62%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1026|ppo_ep: 1|act_loss: -0.001129150390625|cri_loss: 0.001171112060546875|unsuper_loss: 0.0
-average reward score: 5.23828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.72%) |Training time=0.45s (20.74%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1027|ppo_ep: 1|act_loss: 0.031158447265625|cri_loss: 0.0161895751953125|unsuper_loss: 0.0
-average reward score: 5.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.87%) |Training time=0.45s (20.59%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1028|ppo_ep: 1|act_loss: 0.01092529296875|cri_loss: 0.006145477294921875|unsuper_loss: 0.0
-average reward score: 5.0703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.57%) |Training time=0.45s (20.90%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
-[2023-04-14 09:25:29,433] [INFO] [logging.py:96:log_dist] [Rank 0] step=1030, skipped=15, lr=[9.353699038581223e-06, 9.353699038581223e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:25:29,451] [INFO] [timer.py:199:stop] epoch=0/micro_step=1030/global_step=1030, RunningAvgSamplesPerSec=108.50163632858461, CurrSamplesPerSec=113.407363407382, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:25:29,544] [INFO] [logging.py:96:log_dist] [Rank 0] step=1030, skipped=15, lr=[4.84647618579338e-06, 4.84647618579338e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1029|ppo_ep: 1|act_loss: -0.00655364990234375|cri_loss: -0.00279998779296875|unsuper_loss: 0.0
-average reward score: 5.3125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.93%) |Training time=0.44s (20.54%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1030|ppo_ep: 1|act_loss: 0.0004353523254394531|cri_loss: 0.001064300537109375|unsuper_loss: 0.0
-average reward score: 5.28515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.73%) |Training time=0.45s (20.70%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1031|ppo_ep: 1|act_loss: -0.03240966796875|cri_loss: -0.015289306640625|unsuper_loss: 0.0
-average reward score: 5.19140625
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.19%) |Training time=0.43s (18.57%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.74 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1032|ppo_ep: 1|act_loss: 0.00957489013671875|cri_loss: 0.005382537841796875|unsuper_loss: 0.0
-average reward score: 5.7421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.78%) |Training time=0.45s (20.63%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1033|ppo_ep: 1|act_loss: 0.00380706787109375|cri_loss: 0.003082275390625|unsuper_loss: 0.0
-average reward score: 5.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.66%) |Training time=0.45s (20.78%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1034|ppo_ep: 1|act_loss: 0.040985107421875|cri_loss: 0.024444580078125|unsuper_loss: 0.0
-average reward score: 5.2421875
--------------------------------------------------------------------------------------
-|E2E latency=2.51s |Gather latency=0.00s (0.00%) |Generate time=1.62s (64.50%) |Training time=0.45s (17.83%) |Others=0.44 (17.66%)|CurSamplesPerSec=12.76 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1035|ppo_ep: 1|act_loss: 0.0794677734375|cri_loss: 0.04132080078125|unsuper_loss: 0.0
-average reward score: 5.3125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.74%) |Training time=0.45s (20.69%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1036|ppo_ep: 1|act_loss: 0.020751953125|cri_loss: 0.0122833251953125|unsuper_loss: 0.0
-average reward score: 5.078125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.38%) |Training time=0.43s (20.07%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1037|ppo_ep: 1|act_loss: -0.04425048828125|cri_loss: -0.0213623046875|unsuper_loss: 0.0
-average reward score: 5.4609375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.59%) |Training time=0.46s (20.91%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1038|ppo_ep: 1|act_loss: -0.0184478759765625|cri_loss: -0.0088348388671875|unsuper_loss: 0.0
-average reward score: 5.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.96%) |Training time=0.45s (19.69%) |Others=0.10 (4.35%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.41
-[2023-04-14 09:25:51,701] [INFO] [logging.py:96:log_dist] [Rank 0] step=1040, skipped=15, lr=[9.347256082964908e-06, 9.347256082964908e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:25:51,719] [INFO] [timer.py:199:stop] epoch=0/micro_step=1040/global_step=1040, RunningAvgSamplesPerSec=108.5441294190187, CurrSamplesPerSec=113.64030904049277, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:25:51,812] [INFO] [logging.py:96:log_dist] [Rank 0] step=1040, skipped=15, lr=[4.843137866821196e-06, 4.843137866821196e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1039|ppo_ep: 1|act_loss: -0.0318603515625|cri_loss: -0.015228271484375|unsuper_loss: 0.0
-average reward score: 6.421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.90%) |Training time=0.44s (20.58%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1040|ppo_ep: 1|act_loss: -0.003589630126953125|cri_loss: -0.0008382797241210938|unsuper_loss: 0.0
-average reward score: 5.859375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.07%) |Training time=0.44s (20.36%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1041|ppo_ep: 1|act_loss: 0.020965576171875|cri_loss: 0.0125579833984375|unsuper_loss: 0.0
-average reward score: 6.23828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.95%) |Training time=0.44s (20.49%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1042|ppo_ep: 1|act_loss: 0.06640625|cri_loss: 0.03594970703125|unsuper_loss: 0.0
-average reward score: 6.8046875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.68%) |Training time=0.43s (19.77%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1043|ppo_ep: 1|act_loss: 0.0677490234375|cri_loss: 0.035980224609375|unsuper_loss: 0.0
-average reward score: 6.69140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.70%) |Training time=0.43s (19.74%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1044|ppo_ep: 1|act_loss: 0.028533935546875|cri_loss: 0.015899658203125|unsuper_loss: 0.0
-average reward score: 6.125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.34%) |Training time=0.43s (20.09%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1045|ppo_ep: 1|act_loss: -0.02740478515625|cri_loss: -0.011199951171875|unsuper_loss: 0.0
-average reward score: 6.6171875
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.11%) |Training time=0.43s (18.61%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.74 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1046|ppo_ep: 1|act_loss: -0.00704193115234375|cri_loss: -0.0029010772705078125|unsuper_loss: 0.0
-average reward score: 6.53515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.64%) |Training time=0.43s (19.75%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1047|ppo_ep: 1|act_loss: -0.041900634765625|cri_loss: -0.0191650390625|unsuper_loss: 0.0
-average reward score: 5.9765625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.03%) |Training time=0.42s (19.39%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1048|ppo_ep: 1|act_loss: -0.02435302734375|cri_loss: -0.01096343994140625|unsuper_loss: 0.0
-average reward score: 6.5234375
--------------------------------------------------------------------------------------
-|E2E latency=3.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (51.33%) |Training time=0.42s (13.24%) |Others=1.13 (35.43%)|CurSamplesPerSec=10.03 |AvgSamplesPerSec=14.41
-[2023-04-14 09:26:14,502] [INFO] [logging.py:96:log_dist] [Rank 0] step=1050, skipped=15, lr=[9.340746096604097e-06, 9.340746096604097e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:26:14,520] [INFO] [timer.py:199:stop] epoch=0/micro_step=1050/global_step=1050, RunningAvgSamplesPerSec=108.63875987318835, CurrSamplesPerSec=119.30008719666715, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:26:14,613] [INFO] [logging.py:96:log_dist] [Rank 0] step=1050, skipped=15, lr=[4.839764816893315e-06, 4.839764816893315e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1049|ppo_ep: 1|act_loss: -0.0160675048828125|cri_loss: -0.0062408447265625|unsuper_loss: 0.0
-average reward score: 6.0625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.49%) |Training time=0.43s (19.96%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1050|ppo_ep: 1|act_loss: 0.043060302734375|cri_loss: 0.02301025390625|unsuper_loss: 0.0
-average reward score: 6.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.75%) |Training time=0.43s (19.67%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1051|ppo_ep: 1|act_loss: 0.0732421875|cri_loss: 0.038848876953125|unsuper_loss: 0.0
-average reward score: 6.47265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.35%) |Training time=0.42s (19.48%) |Others=0.11 (5.17%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1052|ppo_ep: 1|act_loss: 0.10516357421875|cri_loss: 0.055908203125|unsuper_loss: 0.0
-average reward score: 6.5546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.51%) |Training time=0.43s (19.92%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1053|ppo_ep: 1|act_loss: 0.01715087890625|cri_loss: 0.0106201171875|unsuper_loss: 0.0
-average reward score: 7.18359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.70%) |Training time=0.43s (19.74%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1054|ppo_ep: 1|act_loss: -0.00988006591796875|cri_loss: -0.0038089752197265625|unsuper_loss: 0.0
-average reward score: 6.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.75%) |Training time=0.43s (19.70%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1055|ppo_ep: 1|act_loss: -0.012725830078125|cri_loss: -0.0054473876953125|unsuper_loss: 0.0
-average reward score: 5.83984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.56%) |Training time=0.43s (19.87%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1056|ppo_ep: 1|act_loss: -0.029205322265625|cri_loss: -0.0108184814453125|unsuper_loss: 0.0
-average reward score: 6.203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.64%) |Training time=0.43s (19.80%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1057|ppo_ep: 1|act_loss: -0.03485107421875|cri_loss: -0.0145416259765625|unsuper_loss: 0.0
-average reward score: 5.52734375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.77%) |Training time=0.43s (19.69%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1058|ppo_ep: 1|act_loss: -0.027923583984375|cri_loss: -0.0127105712890625|unsuper_loss: 0.0
-average reward score: 6.32421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.77%) |Training time=0.43s (19.68%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
-[2023-04-14 09:26:36,137] [INFO] [logging.py:96:log_dist] [Rank 0] step=1060, skipped=15, lr=[9.334169175992489e-06, 9.334169175992489e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:26:36,479] [INFO] [timer.py:199:stop] epoch=0/micro_step=1060/global_step=1060, RunningAvgSamplesPerSec=108.62859228172402, CurrSamplesPerSec=53.83196378092723, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:26:36,653] [INFO] [logging.py:96:log_dist] [Rank 0] step=1060, skipped=15, lr=[4.836357086006471e-06, 4.836357086006471e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1059|ppo_ep: 1|act_loss: 0.0212249755859375|cri_loss: 0.01244354248046875|unsuper_loss: 0.0
-average reward score: 6.14453125
--------------------------------------------------------------------------------------
-|E2E latency=2.58s |Gather latency=0.00s (0.00%) |Generate time=1.63s (63.28%) |Training time=0.76s (29.42%) |Others=0.19 (7.30%)|CurSamplesPerSec=12.41 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1060|ppo_ep: 1|act_loss: 0.059539794921875|cri_loss: 0.03155517578125|unsuper_loss: 0.0
-average reward score: 5.90234375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.67s (75.82%) |Training time=0.42s (19.23%) |Others=0.11 (4.94%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1061|ppo_ep: 1|act_loss: 0.025146484375|cri_loss: 0.0146636962890625|unsuper_loss: 0.0
-average reward score: 5.3984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.44%) |Training time=0.43s (19.97%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1062|ppo_ep: 1|act_loss: 0.04669189453125|cri_loss: 0.0294342041015625|unsuper_loss: 0.0
-average reward score: 5.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.02%) |Training time=0.44s (20.41%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1063|ppo_ep: 1|act_loss: -0.025238037109375|cri_loss: -0.00970458984375|unsuper_loss: 0.0
-average reward score: 5.28515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.94%) |Training time=0.44s (20.52%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1064|ppo_ep: 1|act_loss: -0.0404052734375|cri_loss: -0.018768310546875|unsuper_loss: 0.0
-average reward score: 5.91015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.58%) |Training time=0.43s (19.87%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1065|ppo_ep: 1|act_loss: -0.181884765625|cri_loss: -0.07672119140625|unsuper_loss: 0.0
-average reward score: 5.13671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.63%) |Training time=0.43s (19.79%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1066|ppo_ep: 1|act_loss: 0.025543212890625|cri_loss: 0.014892578125|unsuper_loss: 0.0
-average reward score: 5.42578125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.36%) |Training time=0.44s (20.13%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1067|ppo_ep: 1|act_loss: 0.03558349609375|cri_loss: 0.0216064453125|unsuper_loss: 0.0
-average reward score: 5.6171875
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.73s (76.62%) |Training time=0.43s (19.02%) |Others=0.10 (4.36%)|CurSamplesPerSec=14.15 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1068|ppo_ep: 1|act_loss: 0.0838623046875|cri_loss: 0.0458984375|unsuper_loss: 0.0
-average reward score: 4.89453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.66%) |Training time=0.43s (19.77%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41
-[2023-04-14 09:26:58,334] [INFO] [logging.py:96:log_dist] [Rank 0] step=1070, skipped=15, lr=[9.327525418615915e-06, 9.327525418615915e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:26:58,352] [INFO] [timer.py:199:stop] epoch=0/micro_step=1070/global_step=1070, RunningAvgSamplesPerSec=108.71501038930042, CurrSamplesPerSec=122.18717841341514, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:26:58,445] [INFO] [logging.py:96:log_dist] [Rank 0] step=1070, skipped=15, lr=[4.8329147246714595e-06, 4.8329147246714595e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1069|ppo_ep: 1|act_loss: 0.06964111328125|cri_loss: 0.0382080078125|unsuper_loss: 0.0
-average reward score: 5.17578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.82%) |Training time=0.42s (19.64%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1070|ppo_ep: 1|act_loss: -0.0236053466796875|cri_loss: -0.008270263671875|unsuper_loss: 0.0
-average reward score: 5.52734375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.72%) |Training time=0.43s (19.72%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1071|ppo_ep: 1|act_loss: 0.009796142578125|cri_loss: 0.010284423828125|unsuper_loss: 0.0
-average reward score: 5.6015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.72%) |Training time=0.42s (19.70%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1072|ppo_ep: 1|act_loss: 0.021942138671875|cri_loss: 0.013916015625|unsuper_loss: 0.0
-average reward score: 5.07421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.89%) |Training time=0.44s (20.56%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1073|ppo_ep: 1|act_loss: 0.016815185546875|cri_loss: 0.0126800537109375|unsuper_loss: 0.0
-average reward score: 5.17578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.25%) |Training time=0.44s (20.19%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1074|ppo_ep: 1|act_loss: 0.0816650390625|cri_loss: 0.04913330078125|unsuper_loss: 0.0
-average reward score: 4.76171875
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.67s (74.00%) |Training time=0.48s (21.30%) |Others=0.11 (4.70%)|CurSamplesPerSec=14.19 |AvgSamplesPerSec=14.41
-[2023-04-14 09:27:11,505] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 1075|ppo_ep: 1|act_loss: 0.052154541015625|cri_loss: 0.035186767578125|unsuper_loss: 0.0
-average reward score: 4.91796875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.31%) |Training time=0.44s (20.20%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1076|ppo_ep: 1|act_loss: -0.02850341796875|cri_loss: -0.00995635986328125|unsuper_loss: 0.0
-average reward score: 4.4765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.93%) |Training time=0.45s (20.55%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1077|ppo_ep: 1|act_loss: 0.0109100341796875|cri_loss: 0.01220703125|unsuper_loss: 0.0
-average reward score: 4.75
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.29%) |Training time=0.43s (20.15%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.41
-[2023-04-14 09:27:17,989] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
-epoch: 0|step: 1078|ppo_ep: 1|act_loss: -0.045440673828125|cri_loss: 0.000762939453125|unsuper_loss: 0.0
-average reward score: 4.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.52%) |Training time=0.44s (20.28%) |Others=0.09 (4.20%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.41
-[2023-04-14 09:27:20,053] [INFO] [logging.py:96:log_dist] [Rank 0] step=1080, skipped=15, lr=[9.320814922950886e-06, 9.320814922950886e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:27:20,071] [INFO] [timer.py:199:stop] epoch=0/micro_step=1080/global_step=1080, RunningAvgSamplesPerSec=108.79082765447629, CurrSamplesPerSec=116.2762955904011, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:27:20,164] [INFO] [logging.py:96:log_dist] [Rank 0] step=1080, skipped=17, lr=[4.830135935951734e-06, 4.830135935951734e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1079|ppo_ep: 1|act_loss: 0.034088134765625|cri_loss: 0.0189666748046875|unsuper_loss: 0.0
-average reward score: 4.1953125
--------------------------------------------------------------------------------------
-|E2E latency=2.72s |Gather latency=0.00s (0.00%) |Generate time=1.64s (60.37%) |Training time=0.44s (16.12%) |Others=0.64 (23.51%)|CurSamplesPerSec=11.78 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1080|ppo_ep: 1|act_loss: 0.00751495361328125|cri_loss: 0.01019287109375|unsuper_loss: 0.0
-average reward score: 4.2421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.20%) |Training time=0.44s (20.22%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1081|ppo_ep: 1|act_loss: -0.002838134765625|cri_loss: 0.00644683837890625|unsuper_loss: 0.0
-average reward score: 4.43359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.08%) |Training time=0.44s (20.37%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1082|ppo_ep: 1|act_loss: 0.0215911865234375|cri_loss: 0.01245880126953125|unsuper_loss: 0.0
-average reward score: 3.99609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.76%) |Training time=0.45s (20.71%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1083|ppo_ep: 1|act_loss: 0.0736083984375|cri_loss: 0.04473876953125|unsuper_loss: 0.0
-average reward score: 3.4453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.03%) |Training time=0.44s (20.42%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1084|ppo_ep: 1|act_loss: 0.0174407958984375|cri_loss: 0.01013946533203125|unsuper_loss: 0.0
-average reward score: 4.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.98%) |Training time=0.44s (20.44%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1085|ppo_ep: 1|act_loss: 0.0565185546875|cri_loss: 0.04034423828125|unsuper_loss: 0.0
-average reward score: 3.533203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.75%) |Training time=0.45s (20.70%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1086|ppo_ep: 1|act_loss: -0.1109619140625|cri_loss: -0.04583740234375|unsuper_loss: 0.0
-average reward score: 3.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.62s (68.75%) |Training time=0.44s (18.86%) |Others=0.29 (12.39%)|CurSamplesPerSec=13.57 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1087|ppo_ep: 1|act_loss: 0.01435089111328125|cri_loss: 0.008758544921875|unsuper_loss: 0.0
-average reward score: 3.759765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.85%) |Training time=0.45s (20.61%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1088|ppo_ep: 1|act_loss: 0.047698974609375|cri_loss: 0.025726318359375|unsuper_loss: 0.0
-average reward score: 4.625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.96%) |Training time=0.44s (20.49%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
-[2023-04-14 09:27:42,603] [INFO] [logging.py:96:log_dist] [Rank 0] step=1090, skipped=15, lr=[9.314037788463137e-06, 9.314037788463137e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:27:42,621] [INFO] [timer.py:199:stop] epoch=0/micro_step=1090/global_step=1090, RunningAvgSamplesPerSec=108.83461521557098, CurrSamplesPerSec=114.59602382121284, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:27:42,714] [INFO] [logging.py:96:log_dist] [Rank 0] step=1090, skipped=17, lr=[4.826631368735207e-06, 4.826631368735207e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1089|ppo_ep: 1|act_loss: -0.01056671142578125|cri_loss: -0.00135040283203125|unsuper_loss: 0.0
-average reward score: 4.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.83%) |Training time=0.44s (18.96%) |Others=0.10 (4.21%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1090|ppo_ep: 1|act_loss: 0.05499267578125|cri_loss: 0.038818359375|unsuper_loss: 0.0
-average reward score: 4.03125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.65%) |Training time=0.45s (20.84%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1091|ppo_ep: 1|act_loss: 0.348388671875|cri_loss: 0.212890625|unsuper_loss: 0.0
-average reward score: 3.84765625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.17%) |Training time=0.44s (20.31%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1092|ppo_ep: 1|act_loss: 0.2303466796875|cri_loss: 0.137939453125|unsuper_loss: 0.0
-average reward score: 3.955078125
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.25%) |Training time=0.44s (20.05%) |Others=0.15 (6.70%)|CurSamplesPerSec=14.44 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1093|ppo_ep: 1|act_loss: 0.0892333984375|cri_loss: 0.049407958984375|unsuper_loss: 0.0
-average reward score: 3.763671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.05%) |Training time=0.44s (20.41%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1094|ppo_ep: 1|act_loss: 0.0048675537109375|cri_loss: 0.007244110107421875|unsuper_loss: 0.0
-average reward score: 3.6640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.00%) |Training time=0.44s (20.46%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1095|ppo_ep: 1|act_loss: -0.173828125|cri_loss: -0.07159423828125|unsuper_loss: 0.0
-average reward score: 4.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.98%) |Training time=0.44s (20.48%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1096|ppo_ep: 1|act_loss: -0.06512451171875|cri_loss: -0.02496337890625|unsuper_loss: 0.0
-average reward score: 4.2734375
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.74s (76.39%) |Training time=0.44s (19.29%) |Others=0.10 (4.33%)|CurSamplesPerSec=14.03 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1097|ppo_ep: 1|act_loss: -0.01050567626953125|cri_loss: 0.0022735595703125|unsuper_loss: 0.0
-average reward score: 4.17578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.37%) |Training time=0.43s (20.08%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1098|ppo_ep: 1|act_loss: 0.01104736328125|cri_loss: 0.01215362548828125|unsuper_loss: 0.0
-average reward score: 4.64453125
--------------------------------------------------------------------------------------
-|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.62s (76.93%) |Training time=0.39s (18.41%) |Others=0.10 (4.66%)|CurSamplesPerSec=15.16 |AvgSamplesPerSec=14.41
-[2023-04-14 09:28:05,186] [INFO] [logging.py:96:log_dist] [Rank 0] step=1100, skipped=15, lr=[9.307194115606148e-06, 9.307194115606148e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:28:05,204] [INFO] [timer.py:199:stop] epoch=0/micro_step=1100/global_step=1100, RunningAvgSamplesPerSec=108.9002611488009, CurrSamplesPerSec=115.39545924679588, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:28:05,296] [INFO] [logging.py:96:log_dist] [Rank 0] step=1100, skipped=17, lr=[4.823092315229102e-06, 4.823092315229102e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1099|ppo_ep: 1|act_loss: -0.0202178955078125|cri_loss: -0.003326416015625|unsuper_loss: 0.0
-average reward score: 3.9765625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.13%) |Training time=0.44s (20.34%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1100|ppo_ep: 1|act_loss: -0.01149749755859375|cri_loss: -0.00286865234375|unsuper_loss: 0.0
-average reward score: 4.5859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.96%) |Training time=0.44s (20.50%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1101|ppo_ep: 1|act_loss: -0.02362060546875|cri_loss: -0.00521087646484375|unsuper_loss: 0.0
-average reward score: 4.79296875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.05%) |Training time=0.44s (20.40%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1102|ppo_ep: 1|act_loss: 0.00652313232421875|cri_loss: 0.00743865966796875|unsuper_loss: 0.0
-average reward score: 4.76171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.87%) |Training time=0.45s (20.58%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1103|ppo_ep: 1|act_loss: 0.023468017578125|cri_loss: 0.01531982421875|unsuper_loss: 0.0
-average reward score: 5.0234375
--------------------------------------------------------------------------------------
-|E2E latency=3.39s |Gather latency=0.00s (0.00%) |Generate time=1.78s (52.71%) |Training time=0.44s (12.95%) |Others=1.16 (34.34%)|CurSamplesPerSec=9.45 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1104|ppo_ep: 1|act_loss: 0.036376953125|cri_loss: 0.01971435546875|unsuper_loss: 0.0
-average reward score: 4.43359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.45%) |Training time=0.43s (19.78%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1105|ppo_ep: 1|act_loss: 0.07080078125|cri_loss: 0.03900146484375|unsuper_loss: 0.0
-average reward score: 4.6796875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.01%) |Training time=0.42s (19.47%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1106|ppo_ep: 1|act_loss: 0.011138916015625|cri_loss: 0.0069122314453125|unsuper_loss: 0.0
-average reward score: 4.63671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.34%) |Training time=0.44s (20.12%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1107|ppo_ep: 1|act_loss: 0.0400390625|cri_loss: 0.022705078125|unsuper_loss: 0.0
-average reward score: 4.8671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.37%) |Training time=0.43s (20.06%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1108|ppo_ep: 1|act_loss: -0.109130859375|cri_loss: -0.045318603515625|unsuper_loss: 0.0
-average reward score: 4.1796875
--------------------------------------------------------------------------------------
-|E2E latency=2.87s |Gather latency=0.00s (0.00%) |Generate time=1.63s (56.75%) |Training time=0.44s (15.15%) |Others=0.81 (28.10%)|CurSamplesPerSec=11.13 |AvgSamplesPerSec=14.41
-[2023-04-14 09:28:28,786] [INFO] [logging.py:96:log_dist] [Rank 0] step=1110, skipped=15, lr=[9.300284005819661e-06, 9.300284005819661e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:28:28,804] [INFO] [timer.py:199:stop] epoch=0/micro_step=1110/global_step=1110, RunningAvgSamplesPerSec=108.96558639565792, CurrSamplesPerSec=116.06514008993427, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:28:28,897] [INFO] [logging.py:96:log_dist] [Rank 0] step=1110, skipped=17, lr=[4.8195188278907305e-06, 4.8195188278907305e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1109|ppo_ep: 1|act_loss: -0.0684814453125|cri_loss: -0.0257110595703125|unsuper_loss: 0.0
-average reward score: 4.12109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.19%) |Training time=0.44s (20.28%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1110|ppo_ep: 1|act_loss: 0.0562744140625|cri_loss: 0.031768798828125|unsuper_loss: 0.0
-average reward score: 5.16796875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.98%) |Training time=0.44s (20.47%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1111|ppo_ep: 1|act_loss: -0.0019283294677734375|cri_loss: 0.0|unsuper_loss: 0.0
-average reward score: 4.421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.26%) |Training time=0.44s (20.20%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1112|ppo_ep: 1|act_loss: 0.04559326171875|cri_loss: 0.0241241455078125|unsuper_loss: 0.0
-average reward score: 5.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.81%) |Training time=0.43s (19.66%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1113|ppo_ep: 1|act_loss: 0.0626220703125|cri_loss: 0.03411865234375|unsuper_loss: 0.0
-average reward score: 4.84765625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.02%) |Training time=0.44s (20.44%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1114|ppo_ep: 1|act_loss: 0.01316070556640625|cri_loss: 0.00732421875|unsuper_loss: 0.0
-average reward score: 4.48828125
--------------------------------------------------------------------------------------
-|E2E latency=2.56s |Gather latency=0.00s (0.00%) |Generate time=1.62s (63.50%) |Training time=0.44s (17.30%) |Others=0.49 (19.20%)|CurSamplesPerSec=12.51 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1115|ppo_ep: 1|act_loss: -0.030792236328125|cri_loss: -0.014434814453125|unsuper_loss: 0.0
-average reward score: 3.97265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.05%) |Training time=0.44s (20.38%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1116|ppo_ep: 1|act_loss: 0.01446533203125|cri_loss: 0.0109100341796875|unsuper_loss: 0.0
-average reward score: 4.43359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.04%) |Training time=0.44s (20.42%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1117|ppo_ep: 1|act_loss: -0.0129547119140625|cri_loss: -0.00620269775390625|unsuper_loss: 0.0
-average reward score: 5.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.27%) |Training time=0.43s (18.50%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.77 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1118|ppo_ep: 1|act_loss: 0.0660400390625|cri_loss: 0.03564453125|unsuper_loss: 0.0
-average reward score: 5.03125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.07%) |Training time=0.44s (20.37%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41
-[2023-04-14 09:28:50,989] [INFO] [logging.py:96:log_dist] [Rank 0] step=1120, skipped=15, lr=[9.293307561528172e-06, 9.293307561528172e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:28:51,007] [INFO] [timer.py:199:stop] epoch=0/micro_step=1120/global_step=1120, RunningAvgSamplesPerSec=109.03085977626249, CurrSamplesPerSec=126.14459975996218, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:28:51,103] [INFO] [logging.py:96:log_dist] [Rank 0] step=1120, skipped=17, lr=[4.815910959687795e-06, 4.815910959687795e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1119|ppo_ep: 1|act_loss: 0.044677734375|cri_loss: 0.0248870849609375|unsuper_loss: 0.0
-average reward score: 4.21875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.08%) |Training time=0.42s (19.25%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1120|ppo_ep: 1|act_loss: 0.0006799697875976562|cri_loss: 0.001979827880859375|unsuper_loss: 0.0
-average reward score: 4.3984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.89%) |Training time=0.42s (19.56%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1121|ppo_ep: 1|act_loss: 0.0147705078125|cri_loss: 0.00927734375|unsuper_loss: 0.0
-average reward score: 5.06640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.70%) |Training time=0.45s (20.75%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1122|ppo_ep: 1|act_loss: 0.0072174072265625|cri_loss: 0.00495147705078125|unsuper_loss: 0.0
-average reward score: 4.875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.66%) |Training time=0.45s (20.79%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1123|ppo_ep: 1|act_loss: 0.047943115234375|cri_loss: 0.0294342041015625|unsuper_loss: 0.0
-average reward score: 4.54296875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.74%) |Training time=0.45s (20.43%) |Others=0.11 (4.83%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1124|ppo_ep: 1|act_loss: 0.00019073486328125|cri_loss: 0.0015573501586914062|unsuper_loss: 0.0
-average reward score: 4.015625
--------------------------------------------------------------------------------------
-|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.65s (71.87%) |Training time=0.45s (19.42%) |Others=0.20 (8.71%)|CurSamplesPerSec=13.91 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1125|ppo_ep: 1|act_loss: 0.04345703125|cri_loss: 0.0232696533203125|unsuper_loss: 0.0
-average reward score: 4.109375
--------------------------------------------------------------------------------------
-|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.62s (76.99%) |Training time=0.39s (18.33%) |Others=0.10 (4.68%)|CurSamplesPerSec=15.18 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1126|ppo_ep: 1|act_loss: -0.046051025390625|cri_loss: -0.02191162109375|unsuper_loss: 0.0
-average reward score: 4.7890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.09%) |Training time=0.44s (20.34%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1127|ppo_ep: 1|act_loss: -0.0043487548828125|cri_loss: -0.0015249252319335938|unsuper_loss: 0.0
-average reward score: 5.03125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.11%) |Training time=0.44s (20.32%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1128|ppo_ep: 1|act_loss: 0.04193115234375|cri_loss: 0.023284912109375|unsuper_loss: 0.0
-average reward score: 4.8125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.19%) |Training time=0.44s (20.25%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
-[2023-04-14 09:29:12,723] [INFO] [logging.py:96:log_dist] [Rank 0] step=1130, skipped=15, lr=[9.286264886139418e-06, 9.286264886139418e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:29:12,741] [INFO] [timer.py:199:stop] epoch=0/micro_step=1130/global_step=1130, RunningAvgSamplesPerSec=109.09936493188563, CurrSamplesPerSec=115.27771474509555, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:29:12,834] [INFO] [logging.py:96:log_dist] [Rank 0] step=1130, skipped=17, lr=[4.812268764097606e-06, 4.812268764097606e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1129|ppo_ep: 1|act_loss: 0.0244140625|cri_loss: 0.0134124755859375|unsuper_loss: 0.0
-average reward score: 4.63671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.05%) |Training time=0.44s (20.41%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1130|ppo_ep: 1|act_loss: 0.0638427734375|cri_loss: 0.0350341796875|unsuper_loss: 0.0
-average reward score: 4.0625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.02%) |Training time=0.44s (20.41%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1131|ppo_ep: 1|act_loss: 0.012664794921875|cri_loss: 0.00897216796875|unsuper_loss: 0.0
-average reward score: 4.8203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.42%) |Training time=0.43s (20.02%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1132|ppo_ep: 1|act_loss: -0.006801605224609375|cri_loss: -0.00240325927734375|unsuper_loss: 0.0
-average reward score: 3.84375
--------------------------------------------------------------------------------------
-|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.80s (76.72%) |Training time=0.44s (18.77%) |Others=0.11 (4.51%)|CurSamplesPerSec=13.64 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1133|ppo_ep: 1|act_loss: -0.061920166015625|cri_loss: -0.02935791015625|unsuper_loss: 0.0
-average reward score: 4.25
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.95%) |Training time=0.44s (20.39%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1134|ppo_ep: 1|act_loss: -0.033355712890625|cri_loss: -0.0149993896484375|unsuper_loss: 0.0
-average reward score: 4.8828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.02%) |Training time=0.42s (19.42%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1135|ppo_ep: 1|act_loss: -0.023468017578125|cri_loss: -0.01123046875|unsuper_loss: 0.0
-average reward score: 4.31640625
--------------------------------------------------------------------------------------
-|E2E latency=2.51s |Gather latency=0.00s (0.00%) |Generate time=1.65s (65.98%) |Training time=0.40s (16.15%) |Others=0.45 (17.87%)|CurSamplesPerSec=12.77 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1136|ppo_ep: 1|act_loss: -0.0093231201171875|cri_loss: -0.004039764404296875|unsuper_loss: 0.0
-average reward score: 5.02734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.92%) |Training time=0.44s (20.52%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1137|ppo_ep: 1|act_loss: 0.023101806640625|cri_loss: 0.0122833251953125|unsuper_loss: 0.0
-average reward score: 4.63671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.99%) |Training time=0.44s (20.45%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1138|ppo_ep: 1|act_loss: 0.02978515625|cri_loss: 0.0157318115234375|unsuper_loss: 0.0
-average reward score: 4.2109375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.90%) |Training time=0.45s (20.55%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
-[2023-04-14 09:29:34,886] [INFO] [logging.py:96:log_dist] [Rank 0] step=1140, skipped=15, lr=[9.279156084042835e-06, 9.279156084042835e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:29:34,904] [INFO] [timer.py:199:stop] epoch=0/micro_step=1140/global_step=1140, RunningAvgSamplesPerSec=109.17206823492205, CurrSamplesPerSec=121.0444594753028, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:29:34,997] [INFO] [logging.py:96:log_dist] [Rank 0] step=1140, skipped=17, lr=[4.80859229510629e-06, 4.80859229510629e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1139|ppo_ep: 1|act_loss: 0.02178955078125|cri_loss: 0.011505126953125|unsuper_loss: 0.0
-average reward score: 3.923828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.76%) |Training time=0.43s (19.72%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1140|ppo_ep: 1|act_loss: -0.01971435546875|cri_loss: -0.00472259521484375|unsuper_loss: 0.0
-average reward score: 4.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.19%) |Training time=0.44s (20.26%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1141|ppo_ep: 1|act_loss: -0.04974365234375|cri_loss: -0.022308349609375|unsuper_loss: 0.0
-average reward score: 4.73828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.89%) |Training time=0.45s (20.58%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1142|ppo_ep: 1|act_loss: 0.0038242340087890625|cri_loss: 0.003997802734375|unsuper_loss: 0.0
-average reward score: 4.66796875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.12%) |Training time=0.44s (20.32%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1143|ppo_ep: 1|act_loss: 0.006988525390625|cri_loss: 0.00522613525390625|unsuper_loss: 0.0
-average reward score: 4.69140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.05%) |Training time=0.44s (20.41%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1144|ppo_ep: 1|act_loss: 0.0173187255859375|cri_loss: 0.0114898681640625|unsuper_loss: 0.0
-average reward score: 4.8515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.82%) |Training time=0.45s (20.63%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1145|ppo_ep: 1|act_loss: 0.0137481689453125|cri_loss: 0.0083465576171875|unsuper_loss: 0.0
-average reward score: 5.01171875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.84%) |Training time=0.45s (20.61%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1146|ppo_ep: 1|act_loss: 0.04644775390625|cri_loss: 0.025482177734375|unsuper_loss: 0.0
-average reward score: 5.1015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.91%) |Training time=0.44s (20.54%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1147|ppo_ep: 1|act_loss: -0.000690460205078125|cri_loss: 0.0012369155883789062|unsuper_loss: 0.0
-average reward score: 4.29296875
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.80s (76.88%) |Training time=0.44s (18.92%) |Others=0.10 (4.20%)|CurSamplesPerSec=13.66 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1148|ppo_ep: 1|act_loss: 0.0494384765625|cri_loss: 0.0260009765625|unsuper_loss: 0.0
-average reward score: 4.12109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.21%) |Training time=0.44s (20.23%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.41
-[2023-04-14 09:29:56,703] [INFO] [logging.py:96:log_dist] [Rank 0] step=1150, skipped=15, lr=[9.27198126060802e-06, 9.27198126060802e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:29:56,721] [INFO] [timer.py:199:stop] epoch=0/micro_step=1150/global_step=1150, RunningAvgSamplesPerSec=109.21557534240085, CurrSamplesPerSec=115.28613124391221, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:29:56,814] [INFO] [logging.py:96:log_dist] [Rank 0] step=1150, skipped=17, lr=[4.804881607207983e-06, 4.804881607207983e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1149|ppo_ep: 1|act_loss: 0.0153045654296875|cri_loss: 0.01132965087890625|unsuper_loss: 0.0
-average reward score: 5.625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.15%) |Training time=0.44s (20.32%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1150|ppo_ep: 1|act_loss: -0.0416259765625|cri_loss: -0.018768310546875|unsuper_loss: 0.0
-average reward score: 4.6796875
--------------------------------------------------------------------------------------
-|E2E latency=2.93s |Gather latency=0.00s (0.00%) |Generate time=1.64s (55.98%) |Training time=0.42s (14.40%) |Others=0.87 (29.62%)|CurSamplesPerSec=10.94 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1151|ppo_ep: 1|act_loss: -0.05712890625|cri_loss: -0.0264434814453125|unsuper_loss: 0.0
-average reward score: 3.96875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.46%) |Training time=0.43s (19.94%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1152|ppo_ep: 1|act_loss: -0.078369140625|cri_loss: -0.036956787109375|unsuper_loss: 0.0
-average reward score: 4.48046875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.08%) |Training time=0.44s (20.37%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1153|ppo_ep: 1|act_loss: -0.02105712890625|cri_loss: -0.0081329345703125|unsuper_loss: 0.0
-average reward score: 4.91015625
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.01%) |Training time=0.45s (20.49%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1154|ppo_ep: 1|act_loss: 0.0235443115234375|cri_loss: 0.01214599609375|unsuper_loss: 0.0
-average reward score: 4.66796875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.40%) |Training time=0.43s (20.04%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1155|ppo_ep: 1|act_loss: 0.0322265625|cri_loss: 0.0175933837890625|unsuper_loss: 0.0
-average reward score: 4.71484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.46%) |Training time=0.43s (19.98%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1156|ppo_ep: 1|act_loss: 0.02813720703125|cri_loss: 0.01535797119140625|unsuper_loss: 0.0
-average reward score: 3.984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.18%) |Training time=0.44s (20.24%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1157|ppo_ep: 1|act_loss: 0.008514404296875|cri_loss: 0.005710601806640625|unsuper_loss: 0.0
-average reward score: 4.1953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.39%) |Training time=0.43s (20.02%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1158|ppo_ep: 1|act_loss: 0.0053863525390625|cri_loss: 0.0035552978515625|unsuper_loss: 0.0
-average reward score: 4.2421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.96%) |Training time=0.43s (19.94%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41
-[2023-04-14 09:30:19,101] [INFO] [logging.py:96:log_dist] [Rank 0] step=1160, skipped=15, lr=[9.264740522183165e-06, 9.264740522183165e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:30:19,119] [INFO] [timer.py:199:stop] epoch=0/micro_step=1160/global_step=1160, RunningAvgSamplesPerSec=109.29043728671321, CurrSamplesPerSec=118.15315274075587, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:30:19,212] [INFO] [logging.py:96:log_dist] [Rank 0] step=1160, skipped=17, lr=[4.801136755404034e-06, 4.801136755404034e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1159|ppo_ep: 1|act_loss: -0.0248260498046875|cri_loss: -0.01092529296875|unsuper_loss: 0.0
-average reward score: 4.16796875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.39%) |Training time=0.43s (20.07%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1160|ppo_ep: 1|act_loss: -0.0252685546875|cri_loss: -0.01198577880859375|unsuper_loss: 0.0
-average reward score: 4.75
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.42%) |Training time=0.44s (20.06%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1161|ppo_ep: 1|act_loss: -0.025543212890625|cri_loss: -0.01092529296875|unsuper_loss: 0.0
-average reward score: 4.53125
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.79s (77.13%) |Training time=0.43s (18.60%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.80 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1162|ppo_ep: 1|act_loss: -0.08428955078125|cri_loss: -0.0318603515625|unsuper_loss: 0.0
-average reward score: 5.06640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.12%) |Training time=0.44s (20.33%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1163|ppo_ep: 1|act_loss: 0.057861328125|cri_loss: 0.0304412841796875|unsuper_loss: 0.0
-average reward score: 5.62890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.46%) |Training time=0.43s (19.98%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1164|ppo_ep: 1|act_loss: 0.05133056640625|cri_loss: 0.028472900390625|unsuper_loss: 0.0
-average reward score: 4.40234375
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.63s (69.99%) |Training time=0.44s (18.87%) |Others=0.26 (11.14%)|CurSamplesPerSec=13.75 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1165|ppo_ep: 1|act_loss: 0.0540771484375|cri_loss: 0.027984619140625|unsuper_loss: 0.0
-average reward score: 5.75390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.60%) |Training time=0.43s (19.83%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1166|ppo_ep: 1|act_loss: 0.00930023193359375|cri_loss: 0.005130767822265625|unsuper_loss: 0.0
-average reward score: 4.8671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.08%) |Training time=0.44s (20.37%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1167|ppo_ep: 1|act_loss: -0.0136566162109375|cri_loss: -0.006427764892578125|unsuper_loss: 0.0
-average reward score: 4.359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.96%) |Training time=0.44s (20.49%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1168|ppo_ep: 1|act_loss: -0.025115966796875|cri_loss: -0.0105133056640625|unsuper_loss: 0.0
-average reward score: 4.28515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.92%) |Training time=0.45s (20.54%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41
-[2023-04-14 09:30:41,075] [INFO] [logging.py:96:log_dist] [Rank 0] step=1170, skipped=15, lr=[9.25743397609348e-06, 9.25743397609348e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:30:41,093] [INFO] [timer.py:199:stop] epoch=0/micro_step=1170/global_step=1170, RunningAvgSamplesPerSec=109.34554743622506, CurrSamplesPerSec=113.94323795455607, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:30:41,186] [INFO] [logging.py:96:log_dist] [Rank 0] step=1170, skipped=17, lr=[4.797357795202179e-06, 4.797357795202179e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1169|ppo_ep: 1|act_loss: -0.04736328125|cri_loss: -0.018341064453125|unsuper_loss: 0.0
-average reward score: 3.822265625
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.62s (71.43%) |Training time=0.44s (19.54%) |Others=0.21 (9.03%)|CurSamplesPerSec=14.09 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1170|ppo_ep: 1|act_loss: -0.05035400390625|cri_loss: -0.021392822265625|unsuper_loss: 0.0
-average reward score: 4.90625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.10%) |Training time=0.44s (20.35%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1171|ppo_ep: 1|act_loss: 0.00125885009765625|cri_loss: 0.0021820068359375|unsuper_loss: 0.0
-average reward score: 4.43359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.96%) |Training time=0.44s (20.50%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1172|ppo_ep: 1|act_loss: -0.00449371337890625|cri_loss: -0.0005588531494140625|unsuper_loss: 0.0
-average reward score: 3.71484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.86%) |Training time=0.45s (20.60%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1173|ppo_ep: 1|act_loss: 0.022857666015625|cri_loss: 0.01275634765625|unsuper_loss: 0.0
-average reward score: 3.71484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.98%) |Training time=0.44s (20.47%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1174|ppo_ep: 1|act_loss: 0.087158203125|cri_loss: 0.047210693359375|unsuper_loss: 0.0
-average reward score: 4.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.03%) |Training time=0.44s (20.41%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1175|ppo_ep: 1|act_loss: -0.0106964111328125|cri_loss: -0.0047454833984375|unsuper_loss: 0.0
-average reward score: 4.234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.31%) |Training time=0.44s (20.16%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1176|ppo_ep: 1|act_loss: -0.036376953125|cri_loss: -0.0170440673828125|unsuper_loss: 0.0
-average reward score: 5.74609375
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.12%) |Training time=0.46s (19.58%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1177|ppo_ep: 1|act_loss: -0.05767822265625|cri_loss: -0.0277862548828125|unsuper_loss: 0.0
-average reward score: 4.23828125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.65%) |Training time=0.45s (20.82%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1178|ppo_ep: 1|act_loss: 0.023590087890625|cri_loss: 0.014129638671875|unsuper_loss: 0.0
-average reward score: 4.44140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.05%) |Training time=0.44s (20.40%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41
-[2023-04-14 09:31:03,012] [INFO] [logging.py:96:log_dist] [Rank 0] step=1180, skipped=15, lr=[9.250061730639604e-06, 9.250061730639604e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:31:03,276] [INFO] [timer.py:199:stop] epoch=0/micro_step=1180/global_step=1180, RunningAvgSamplesPerSec=109.3098238350795, CurrSamplesPerSec=63.37054074560561, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:31:03,369] [INFO] [logging.py:96:log_dist] [Rank 0] step=1180, skipped=17, lr=[4.793544782615725e-06, 4.793544782615725e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1179|ppo_ep: 1|act_loss: 0.005401611328125|cri_loss: 0.0032062530517578125|unsuper_loss: 0.0
-average reward score: 3.94921875
--------------------------------------------------------------------------------------
-|E2E latency=2.41s |Gather latency=0.00s (0.00%) |Generate time=1.64s (68.15%) |Training time=0.67s (27.78%) |Others=0.10 (4.08%)|CurSamplesPerSec=13.29 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1180|ppo_ep: 1|act_loss: -0.025115966796875|cri_loss: -0.011627197265625|unsuper_loss: 0.0
-average reward score: 4.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.60%) |Training time=0.43s (19.84%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1181|ppo_ep: 1|act_loss: 0.06866455078125|cri_loss: 0.036651611328125|unsuper_loss: 0.0
-average reward score: 4.6171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.16%) |Training time=0.44s (20.30%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1182|ppo_ep: 1|act_loss: -0.0316162109375|cri_loss: -0.014556884765625|unsuper_loss: 0.0
-average reward score: 4.6015625
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.45%) |Training time=0.44s (20.06%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1183|ppo_ep: 1|act_loss: 0.024444580078125|cri_loss: 0.0137939453125|unsuper_loss: 0.0
-average reward score: 5.03125
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.74s (77.02%) |Training time=0.42s (18.62%) |Others=0.10 (4.36%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1184|ppo_ep: 1|act_loss: -0.04498291015625|cri_loss: -0.02166748046875|unsuper_loss: 0.0
-average reward score: 4.625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.60%) |Training time=0.43s (19.86%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1185|ppo_ep: 1|act_loss: 0.0167236328125|cri_loss: 0.009552001953125|unsuper_loss: 0.0
-average reward score: 4.8203125
--------------------------------------------------------------------------------------
-|E2E latency=2.49s |Gather latency=0.00s (0.00%) |Generate time=1.64s (65.59%) |Training time=0.43s (17.23%) |Others=0.43 (17.18%)|CurSamplesPerSec=12.83 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1186|ppo_ep: 1|act_loss: -0.0032024383544921875|cri_loss: -0.0007781982421875|unsuper_loss: 0.0
-average reward score: 5.01171875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.92%) |Training time=0.42s (19.54%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1187|ppo_ep: 1|act_loss: -0.017578125|cri_loss: -0.00304412841796875|unsuper_loss: 0.0
-average reward score: 3.634765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.42%) |Training time=0.41s (19.05%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1188|ppo_ep: 1|act_loss: -0.009918212890625|cri_loss: -0.0033702850341796875|unsuper_loss: 0.0
-average reward score: 4.265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.60%) |Training time=0.43s (19.85%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41
-[2023-04-14 09:31:25,384] [INFO] [logging.py:96:log_dist] [Rank 0] step=1190, skipped=15, lr=[9.242623895096e-06, 9.242623895096e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:31:25,403] [INFO] [timer.py:199:stop] epoch=0/micro_step=1190/global_step=1190, RunningAvgSamplesPerSec=109.38797397111014, CurrSamplesPerSec=113.55204980063334, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:31:25,497] [INFO] [logging.py:96:log_dist] [Rank 0] step=1190, skipped=17, lr=[4.789697774162718e-06, 4.789697774162718e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1189|ppo_ep: 1|act_loss: -0.028289794921875|cri_loss: -0.01258087158203125|unsuper_loss: 0.0
-average reward score: 4.91796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.85%) |Training time=0.44s (20.53%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1190|ppo_ep: 1|act_loss: -0.020721435546875|cri_loss: -0.0089569091796875|unsuper_loss: 0.0
-average reward score: 4.421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.98%) |Training time=0.44s (20.45%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1191|ppo_ep: 1|act_loss: -0.03985595703125|cri_loss: -0.018280029296875|unsuper_loss: 0.0
-average reward score: 4.02734375
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.79s (77.01%) |Training time=0.44s (18.72%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.77 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1192|ppo_ep: 1|act_loss: -0.05194091796875|cri_loss: -0.02423095703125|unsuper_loss: 0.0
-average reward score: 4.234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.96%) |Training time=0.44s (20.50%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1193|ppo_ep: 1|act_loss: 0.035552978515625|cri_loss: 0.018402099609375|unsuper_loss: 0.0
-average reward score: 4.6328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.94%) |Training time=0.44s (20.51%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1194|ppo_ep: 1|act_loss: 0.11077880859375|cri_loss: 0.060760498046875|unsuper_loss: 0.0
-average reward score: 4.484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.97%) |Training time=0.42s (19.47%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1195|ppo_ep: 1|act_loss: 0.10888671875|cri_loss: 0.05926513671875|unsuper_loss: 0.0
-average reward score: 4.8203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.44%) |Training time=0.43s (20.00%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1196|ppo_ep: 1|act_loss: 0.04571533203125|cri_loss: 0.0241851806640625|unsuper_loss: 0.0
-average reward score: 4.48828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.14%) |Training time=0.44s (20.30%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1197|ppo_ep: 1|act_loss: 0.028533935546875|cri_loss: 0.0150604248046875|unsuper_loss: 0.0
-average reward score: 4.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.62s (76.97%) |Training time=0.39s (18.34%) |Others=0.10 (4.69%)|CurSamplesPerSec=15.18 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1198|ppo_ep: 1|act_loss: -0.020477294921875|cri_loss: -0.00858306884765625|unsuper_loss: 0.0
-average reward score: 4.16796875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.67%) |Training time=0.43s (19.78%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
-[2023-04-14 09:31:47,505] [INFO] [logging.py:96:log_dist] [Rank 0] step=1200, skipped=15, lr=[9.235120579709336e-06, 9.235120579709336e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:31:47,520] [INFO] [timer.py:199:stop] epoch=0/micro_step=1200/global_step=1200, RunningAvgSamplesPerSec=109.46486512641026, CurrSamplesPerSec=118.27549262462009, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:31:47,613] [INFO] [logging.py:96:log_dist] [Rank 0] step=1200, skipped=17, lr=[4.7858168268651025e-06, 4.7858168268651025e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1199|ppo_ep: 1|act_loss: -0.04156494140625|cri_loss: -0.0186614990234375|unsuper_loss: 0.0
-average reward score: 4.33984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.45%) |Training time=0.43s (20.02%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1200|ppo_ep: 1|act_loss: -0.052703857421875|cri_loss: -0.02392578125|unsuper_loss: 0.0
-average reward score: 4.7421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.32%) |Training time=0.44s (20.14%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1201|ppo_ep: 1|act_loss: -0.0755615234375|cri_loss: -0.033935546875|unsuper_loss: 0.0
-average reward score: 4.234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.56%) |Training time=0.43s (19.91%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1202|ppo_ep: 1|act_loss: 0.0014781951904296875|cri_loss: 0.002246856689453125|unsuper_loss: 0.0
-average reward score: 4.328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.72%) |Training time=0.43s (19.74%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1203|ppo_ep: 1|act_loss: 0.00370025634765625|cri_loss: 0.0035266876220703125|unsuper_loss: 0.0
-average reward score: 4.2109375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.52%) |Training time=0.43s (19.94%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1204|ppo_ep: 1|act_loss: 0.00565338134765625|cri_loss: 0.003631591796875|unsuper_loss: 0.0
-average reward score: 4.109375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.22%) |Training time=0.44s (20.26%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1205|ppo_ep: 1|act_loss: 0.00897216796875|cri_loss: 0.00499725341796875|unsuper_loss: 0.0
-average reward score: 4.83984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.52%) |Training time=0.43s (19.94%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1206|ppo_ep: 1|act_loss: 0.01291656494140625|cri_loss: 0.007625579833984375|unsuper_loss: 0.0
-average reward score: 4.3203125
--------------------------------------------------------------------------------------
-|E2E latency=2.91s |Gather latency=0.00s (0.00%) |Generate time=1.81s (62.01%) |Training time=0.42s (14.56%) |Others=0.68 (23.43%)|CurSamplesPerSec=10.98 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1207|ppo_ep: 1|act_loss: 0.04498291015625|cri_loss: 0.024261474609375|unsuper_loss: 0.0
-average reward score: 4.453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.68%) |Training time=0.43s (19.77%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1208|ppo_ep: 1|act_loss: 0.0108795166015625|cri_loss: 0.00605010986328125|unsuper_loss: 0.0
-average reward score: 5.44921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.43%) |Training time=0.43s (20.03%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
-[2023-04-14 09:32:09,937] [INFO] [logging.py:96:log_dist] [Rank 0] step=1210, skipped=15, lr=[9.22755189569684e-06, 9.22755189569684e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:32:09,955] [INFO] [timer.py:199:stop] epoch=0/micro_step=1210/global_step=1210, RunningAvgSamplesPerSec=109.54153886070056, CurrSamplesPerSec=129.74929212839035, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:32:10,048] [INFO] [logging.py:96:log_dist] [Rank 0] step=1210, skipped=17, lr=[4.7819019982478805e-06, 4.7819019982478805e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1209|ppo_ep: 1|act_loss: 0.0146484375|cri_loss: 0.008087158203125|unsuper_loss: 0.0
-average reward score: 4.421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.45%) |Training time=0.41s (18.99%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1210|ppo_ep: 1|act_loss: -0.0238800048828125|cri_loss: -0.01039886474609375|unsuper_loss: 0.0
-average reward score: 5.46484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.82%) |Training time=0.43s (19.64%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1211|ppo_ep: 1|act_loss: 0.01238250732421875|cri_loss: 0.006862640380859375|unsuper_loss: 0.0
-average reward score: 4.66796875
--------------------------------------------------------------------------------------
-|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.66s (74.33%) |Training time=0.44s (19.62%) |Others=0.14 (6.05%)|CurSamplesPerSec=14.33 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1212|ppo_ep: 1|act_loss: -0.0153656005859375|cri_loss: -0.004589080810546875|unsuper_loss: 0.0
-average reward score: 4.5546875
--------------------------------------------------------------------------------------
-|E2E latency=2.67s |Gather latency=0.00s (0.00%) |Generate time=1.69s (63.17%) |Training time=0.44s (16.36%) |Others=0.55 (20.47%)|CurSamplesPerSec=11.98 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1213|ppo_ep: 1|act_loss: 0.00885772705078125|cri_loss: 0.005619049072265625|unsuper_loss: 0.0
-average reward score: 4.19921875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.27%) |Training time=0.43s (20.14%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1214|ppo_ep: 1|act_loss: 0.0186920166015625|cri_loss: 0.0106964111328125|unsuper_loss: 0.0
-average reward score: 5.26171875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.51%) |Training time=0.43s (19.95%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1215|ppo_ep: 1|act_loss: -0.0203704833984375|cri_loss: -0.00943756103515625|unsuper_loss: 0.0
-average reward score: 4.1484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.17%) |Training time=0.44s (20.18%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1216|ppo_ep: 1|act_loss: -0.0011844635009765625|cri_loss: 0.00022029876708984375|unsuper_loss: 0.0
-average reward score: 4.6171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.05%) |Training time=0.44s (20.30%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1217|ppo_ep: 1|act_loss: -0.01849365234375|cri_loss: -0.00757598876953125|unsuper_loss: 0.0
-average reward score: 4.64453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.75%) |Training time=0.45s (20.69%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1218|ppo_ep: 1|act_loss: 0.00743865966796875|cri_loss: 0.00434112548828125|unsuper_loss: 0.0
-average reward score: 4.1328125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.56%) |Training time=0.44s (20.17%) |Others=0.11 (5.27%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.41
-[2023-04-14 09:32:32,197] [INFO] [logging.py:96:log_dist] [Rank 0] step=1220, skipped=15, lr=[9.219917955244674e-06, 9.219917955244674e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:32:32,214] [INFO] [timer.py:199:stop] epoch=0/micro_step=1220/global_step=1220, RunningAvgSamplesPerSec=109.59446571481158, CurrSamplesPerSec=116.17172795170941, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:32:32,308] [INFO] [logging.py:96:log_dist] [Rank 0] step=1220, skipped=17, lr=[4.777953346338256e-06, 4.777953346338256e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1219|ppo_ep: 1|act_loss: 0.03564453125|cri_loss: 0.01837158203125|unsuper_loss: 0.0
-average reward score: 4.1484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.18%) |Training time=0.44s (20.26%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1220|ppo_ep: 1|act_loss: 0.0859375|cri_loss: 0.046600341796875|unsuper_loss: 0.0
-average reward score: 4.80859375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.06%) |Training time=0.44s (20.37%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1221|ppo_ep: 1|act_loss: -0.004993438720703125|cri_loss: -0.002292633056640625|unsuper_loss: 0.0
-average reward score: 4.91015625
--------------------------------------------------------------------------------------
-|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.63s (72.70%) |Training time=0.45s (20.14%) |Others=0.16 (7.17%)|CurSamplesPerSec=14.30 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1222|ppo_ep: 1|act_loss: -0.022979736328125|cri_loss: -0.009552001953125|unsuper_loss: 0.0
-average reward score: 4.7421875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.29%) |Training time=0.44s (20.18%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1223|ppo_ep: 1|act_loss: -0.041015625|cri_loss: -0.019622802734375|unsuper_loss: 0.0
-average reward score: 5.171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.00%) |Training time=0.44s (20.39%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1224|ppo_ep: 1|act_loss: -0.0154571533203125|cri_loss: -0.007038116455078125|unsuper_loss: 0.0
-average reward score: 4.015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.01%) |Training time=0.42s (19.41%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1225|ppo_ep: 1|act_loss: -0.04803466796875|cri_loss: -0.022705078125|unsuper_loss: 0.0
-average reward score: 4.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.55%) |Training time=0.43s (19.89%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1226|ppo_ep: 1|act_loss: 0.0100250244140625|cri_loss: 0.00542449951171875|unsuper_loss: 0.0
-average reward score: 4.296875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.41%) |Training time=0.43s (20.05%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1227|ppo_ep: 1|act_loss: 0.06689453125|cri_loss: 0.03717041015625|unsuper_loss: 0.0
-average reward score: 4.55859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.51%) |Training time=0.43s (19.93%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1228|ppo_ep: 1|act_loss: 0.047698974609375|cri_loss: 0.024688720703125|unsuper_loss: 0.0
-average reward score: 4.2578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.46%) |Training time=0.43s (20.00%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42
-[2023-04-14 09:32:53,944] [INFO] [logging.py:96:log_dist] [Rank 0] step=1230, skipped=15, lr=[9.212218871506249e-06, 9.212218871506249e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:32:53,962] [INFO] [timer.py:199:stop] epoch=0/micro_step=1230/global_step=1230, RunningAvgSamplesPerSec=109.6524556882401, CurrSamplesPerSec=118.64602981670637, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:32:54,055] [INFO] [logging.py:96:log_dist] [Rank 0] step=1230, skipped=17, lr=[4.773970929664774e-06, 4.773970929664774e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1229|ppo_ep: 1|act_loss: -0.00830078125|cri_loss: -0.0036220550537109375|unsuper_loss: 0.0
-average reward score: 4.5546875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.53%) |Training time=0.43s (19.94%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1230|ppo_ep: 1|act_loss: -0.00394439697265625|cri_loss: -0.0006084442138671875|unsuper_loss: 0.0
-average reward score: 4.94921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.23%) |Training time=0.44s (20.22%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1231|ppo_ep: 1|act_loss: -0.1021728515625|cri_loss: -0.039215087890625|unsuper_loss: 0.0
-average reward score: 5.01171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.25%) |Training time=0.44s (20.12%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1232|ppo_ep: 1|act_loss: -0.007389068603515625|cri_loss: -0.00281524658203125|unsuper_loss: 0.0
-average reward score: 4.1796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.34%) |Training time=0.44s (20.14%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1233|ppo_ep: 1|act_loss: 0.00864410400390625|cri_loss: 0.004642486572265625|unsuper_loss: 0.0
-average reward score: 5.3125
--------------------------------------------------------------------------------------
-|E2E latency=2.48s |Gather latency=0.00s (0.00%) |Generate time=1.64s (65.95%) |Training time=0.44s (17.58%) |Others=0.41 (16.47%)|CurSamplesPerSec=12.91 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1234|ppo_ep: 1|act_loss: 0.05426025390625|cri_loss: 0.03143310546875|unsuper_loss: 0.0
-average reward score: 4.265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.21%) |Training time=0.44s (20.25%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1235|ppo_ep: 1|act_loss: 0.03375244140625|cri_loss: 0.017425537109375|unsuper_loss: 0.0
-average reward score: 4.6484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.26%) |Training time=0.44s (20.18%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1236|ppo_ep: 1|act_loss: 0.01275634765625|cri_loss: 0.007781982421875|unsuper_loss: 0.0
-average reward score: 4.58984375
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.19%) |Training time=0.43s (18.57%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1237|ppo_ep: 1|act_loss: 0.0258026123046875|cri_loss: 0.0137481689453125|unsuper_loss: 0.0
-average reward score: 5.11328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.50%) |Training time=0.43s (19.96%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1238|ppo_ep: 1|act_loss: -0.0137786865234375|cri_loss: -0.006542205810546875|unsuper_loss: 0.0
-average reward score: 4.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.41%) |Training time=0.43s (20.04%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42
-[2023-04-14 09:33:16,086] [INFO] [logging.py:96:log_dist] [Rank 0] step=1240, skipped=15, lr=[9.204454758600558e-06, 9.204454758600558e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:33:16,212] [INFO] [timer.py:199:stop] epoch=0/micro_step=1240/global_step=1240, RunningAvgSamplesPerSec=109.67958562836411, CurrSamplesPerSec=85.63239207564769, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:33:16,305] [INFO] [logging.py:96:log_dist] [Rank 0] step=1240, skipped=17, lr=[4.769954807256458e-06, 4.769954807256458e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1239|ppo_ep: 1|act_loss: -0.1005859375|cri_loss: -0.047332763671875|unsuper_loss: 0.0
-average reward score: 4.453125
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.64s (72.10%) |Training time=0.54s (23.57%) |Others=0.10 (4.32%)|CurSamplesPerSec=14.06 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1240|ppo_ep: 1|act_loss: 0.018646240234375|cri_loss: 0.01007843017578125|unsuper_loss: 0.0
-average reward score: 5.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.09%) |Training time=0.43s (19.45%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.54 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1241|ppo_ep: 1|act_loss: 0.0440673828125|cri_loss: 0.023956298828125|unsuper_loss: 0.0
-average reward score: 4.54296875
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.74s (76.14%) |Training time=0.45s (19.54%) |Others=0.10 (4.32%)|CurSamplesPerSec=14.01 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1242|ppo_ep: 1|act_loss: -0.012939453125|cri_loss: -0.00576019287109375|unsuper_loss: 0.0
-average reward score: 4.5859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.61%) |Training time=0.43s (19.84%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1243|ppo_ep: 1|act_loss: -0.013214111328125|cri_loss: -0.0034637451171875|unsuper_loss: 0.0
-average reward score: 4.40234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.59%) |Training time=0.43s (19.87%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1244|ppo_ep: 1|act_loss: -0.0278167724609375|cri_loss: -0.0135498046875|unsuper_loss: 0.0
-average reward score: 5.50390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.85%) |Training time=0.42s (19.60%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1245|ppo_ep: 1|act_loss: -0.0090789794921875|cri_loss: -0.00386810302734375|unsuper_loss: 0.0
-average reward score: 4.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.53%) |Training time=0.43s (19.83%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1246|ppo_ep: 1|act_loss: -0.0220794677734375|cri_loss: -0.01053619384765625|unsuper_loss: 0.0
-average reward score: 4.65625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.50%) |Training time=0.43s (19.83%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1247|ppo_ep: 1|act_loss: -0.0011386871337890625|cri_loss: 0.00011157989501953125|unsuper_loss: 0.0
-average reward score: 3.87109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.74%) |Training time=0.43s (19.68%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1248|ppo_ep: 1|act_loss: 0.03851318359375|cri_loss: 0.021759033203125|unsuper_loss: 0.0
-average reward score: 4.359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.88%) |Training time=0.42s (19.56%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.42
-[2023-04-14 09:33:38,001] [INFO] [logging.py:96:log_dist] [Rank 0] step=1250, skipped=15, lr=[9.196625731610486e-06, 9.196625731610486e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:33:38,019] [INFO] [timer.py:199:stop] epoch=0/micro_step=1250/global_step=1250, RunningAvgSamplesPerSec=109.75391174162782, CurrSamplesPerSec=119.57984018331966, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:33:38,112] [INFO] [logging.py:96:log_dist] [Rank 0] step=1250, skipped=17, lr=[4.765905038641933e-06, 4.765905038641933e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1249|ppo_ep: 1|act_loss: 0.003772735595703125|cri_loss: 0.0023212432861328125|unsuper_loss: 0.0
-average reward score: 4.2578125
--------------------------------------------------------------------------------------
-|E2E latency=2.63s |Gather latency=0.00s (0.00%) |Generate time=1.64s (62.29%) |Training time=0.43s (16.38%) |Others=0.56 (21.33%)|CurSamplesPerSec=12.18 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1250|ppo_ep: 1|act_loss: 0.05157470703125|cri_loss: 0.0281829833984375|unsuper_loss: 0.0
-average reward score: 3.8984375
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.64s (77.15%) |Training time=0.39s (18.21%) |Others=0.10 (4.64%)|CurSamplesPerSec=15.04 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1251|ppo_ep: 1|act_loss: -0.022918701171875|cri_loss: -0.0090789794921875|unsuper_loss: 0.0
-average reward score: 4.48828125
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.81s (77.81%) |Training time=0.42s (17.95%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1252|ppo_ep: 1|act_loss: 0.08087158203125|cri_loss: 0.043609619140625|unsuper_loss: 0.0
-average reward score: 3.96484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.87%) |Training time=0.42s (19.56%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1253|ppo_ep: 1|act_loss: 0.01342010498046875|cri_loss: 0.0078125|unsuper_loss: 0.0
-average reward score: 4.3046875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.60%) |Training time=0.43s (19.84%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1254|ppo_ep: 1|act_loss: -0.002349853515625|cri_loss: -0.00026798248291015625|unsuper_loss: 0.0
-average reward score: 4.15234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.46%) |Training time=0.43s (19.98%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1255|ppo_ep: 1|act_loss: 0.020172119140625|cri_loss: 0.01102447509765625|unsuper_loss: 0.0
-average reward score: 4.0625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.01%) |Training time=0.44s (20.39%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1256|ppo_ep: 1|act_loss: 0.00833892822265625|cri_loss: 0.0047454833984375|unsuper_loss: 0.0
-average reward score: 4.07421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.31%) |Training time=0.44s (20.13%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1257|ppo_ep: 1|act_loss: 0.01318359375|cri_loss: 0.007080078125|unsuper_loss: 0.0
-average reward score: 4.17578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.22%) |Training time=0.44s (20.22%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1258|ppo_ep: 1|act_loss: 0.0224609375|cri_loss: 0.0117340087890625|unsuper_loss: 0.0
-average reward score: 4.3125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.92%) |Training time=0.44s (20.52%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42
-[2023-04-14 09:34:00,234] [INFO] [logging.py:96:log_dist] [Rank 0] step=1260, skipped=15, lr=[9.188731906581099e-06, 9.188731906581099e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:34:00,253] [INFO] [timer.py:199:stop] epoch=0/micro_step=1260/global_step=1260, RunningAvgSamplesPerSec=109.82973644022236, CurrSamplesPerSec=115.90207136793875, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:34:00,767] [INFO] [logging.py:96:log_dist] [Rank 0] step=1260, skipped=17, lr=[4.761821683848535e-06, 4.761821683848535e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1259|ppo_ep: 1|act_loss: -0.0025997161865234375|cri_loss: -9.34600830078125e-05|unsuper_loss: 0.0
-average reward score: 3.78125
--------------------------------------------------------------------------------------
-|E2E latency=2.58s |Gather latency=0.00s (0.00%) |Generate time=1.62s (62.88%) |Training time=0.86s (33.31%) |Others=0.10 (3.81%)|CurSamplesPerSec=12.39 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1260|ppo_ep: 1|act_loss: -0.0119476318359375|cri_loss: -0.00537872314453125|unsuper_loss: 0.0
-average reward score: 4.02734375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.03%) |Training time=0.44s (20.41%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1261|ppo_ep: 1|act_loss: -0.062286376953125|cri_loss: -0.030242919921875|unsuper_loss: 0.0
-average reward score: 4.45703125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.00%) |Training time=0.44s (20.45%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1262|ppo_ep: 1|act_loss: -0.04656982421875|cri_loss: -0.022186279296875|unsuper_loss: 0.0
-average reward score: 3.5546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.83%) |Training time=0.45s (20.62%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1263|ppo_ep: 1|act_loss: -0.03692626953125|cri_loss: -0.0166778564453125|unsuper_loss: 0.0
-average reward score: 3.6015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.03%) |Training time=0.44s (20.41%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1264|ppo_ep: 1|act_loss: 0.04559326171875|cri_loss: 0.0249481201171875|unsuper_loss: 0.0
-average reward score: 4.08984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.97%) |Training time=0.44s (20.48%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1265|ppo_ep: 1|act_loss: 0.032470703125|cri_loss: 0.0174713134765625|unsuper_loss: 0.0
-average reward score: 3.62109375
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.05%) |Training time=0.50s (22.48%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.45 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1266|ppo_ep: 1|act_loss: -0.03363037109375|cri_loss: -0.01528167724609375|unsuper_loss: 0.0
-average reward score: 4.3828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.99%) |Training time=0.44s (20.46%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1267|ppo_ep: 1|act_loss: -0.1041259765625|cri_loss: -0.0447998046875|unsuper_loss: 0.0
-average reward score: 4.125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.04%) |Training time=0.44s (20.42%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1268|ppo_ep: 1|act_loss: -0.0225982666015625|cri_loss: -0.00713348388671875|unsuper_loss: 0.0
-average reward score: 3.890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.65%) |Training time=0.45s (20.79%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42
-[2023-04-14 09:34:22,397] [INFO] [logging.py:96:log_dist] [Rank 0] step=1270, skipped=15, lr=[9.180773400517926e-06, 9.180773400517926e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:34:22,416] [INFO] [timer.py:199:stop] epoch=0/micro_step=1270/global_step=1270, RunningAvgSamplesPerSec=109.86249534800284, CurrSamplesPerSec=121.51866314532418, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:34:22,508] [INFO] [logging.py:96:log_dist] [Rank 0] step=1270, skipped=17, lr=[4.757704803401435e-06, 4.757704803401435e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1269|ppo_ep: 1|act_loss: 0.058837890625|cri_loss: 0.031768798828125|unsuper_loss: 0.0
-average reward score: 4.7109375
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.69s (76.27%) |Training time=0.43s (19.29%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1270|ppo_ep: 1|act_loss: 0.06817626953125|cri_loss: 0.036041259765625|unsuper_loss: 0.0
-average reward score: 4.24609375
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.73s (76.10%) |Training time=0.44s (19.54%) |Others=0.10 (4.36%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1271|ppo_ep: 1|act_loss: 0.0235595703125|cri_loss: 0.0127410888671875|unsuper_loss: 0.0
-average reward score: 4.41796875
--------------------------------------------------------------------------------------
-|E2E latency=2.45s |Gather latency=0.00s (0.00%) |Generate time=1.62s (66.17%) |Training time=0.44s (18.14%) |Others=0.38 (15.69%)|CurSamplesPerSec=13.07 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1272|ppo_ep: 1|act_loss: 0.017425537109375|cri_loss: 0.0092315673828125|unsuper_loss: 0.0
-average reward score: 4.04296875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.16%) |Training time=0.44s (20.29%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1273|ppo_ep: 1|act_loss: 0.0494384765625|cri_loss: 0.0261993408203125|unsuper_loss: 0.0
-average reward score: 4.09375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.29%) |Training time=0.44s (20.15%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1274|ppo_ep: 1|act_loss: -0.03271484375|cri_loss: -0.011749267578125|unsuper_loss: 0.0
-average reward score: 4.09765625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.27%) |Training time=0.44s (20.15%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1275|ppo_ep: 1|act_loss: -0.03143310546875|cri_loss: -0.0136566162109375|unsuper_loss: 0.0
-average reward score: 5.04296875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.49%) |Training time=0.43s (19.95%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1276|ppo_ep: 1|act_loss: -0.00504302978515625|cri_loss: -0.0022125244140625|unsuper_loss: 0.0
-average reward score: 4.6953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.41%) |Training time=0.43s (20.03%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1277|ppo_ep: 1|act_loss: -0.0264892578125|cri_loss: -0.011505126953125|unsuper_loss: 0.0
-average reward score: 4.92578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.32%) |Training time=0.44s (20.13%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1278|ppo_ep: 1|act_loss: 0.00826263427734375|cri_loss: 0.005523681640625|unsuper_loss: 0.0
-average reward score: 4.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.55%) |Training time=0.43s (19.90%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42
-[2023-04-14 09:34:44,417] [INFO] [logging.py:96:log_dist] [Rank 0] step=1280, skipped=15, lr=[9.172750331385226e-06, 9.172750331385226e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:34:44,435] [INFO] [timer.py:199:stop] epoch=0/micro_step=1280/global_step=1280, RunningAvgSamplesPerSec=109.9137738235626, CurrSamplesPerSec=118.03625573723781, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:34:44,519] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-[2023-04-14 09:34:44,520] [INFO] [logging.py:96:log_dist] [Rank 0] step=1280, skipped=18, lr=[4.753970996989642e-06, 4.753970996989642e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1279|ppo_ep: 1|act_loss: -0.0079345703125|cri_loss: -0.003147125244140625|unsuper_loss: 0.0
-average reward score: 4.71875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.76%) |Training time=0.43s (20.16%) |Others=0.09 (4.07%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.42
-[2023-04-14 09:34:46,838] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 1280|ppo_ep: 1|act_loss: -0.0105133056640625|cri_loss: -0.004421234130859375|unsuper_loss: 0.0
-average reward score: 4.8671875
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.66%) |Training time=0.43s (18.54%) |Others=0.09 (3.80%)|CurSamplesPerSec=13.80 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1281|ppo_ep: 1|act_loss: 0.026824951171875|cri_loss: 0.01517486572265625|unsuper_loss: 0.0
-average reward score: 5.51953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.81%) |Training time=0.45s (20.63%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
-[2023-04-14 09:34:51,046] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 1282|ppo_ep: 1|act_loss: -0.035003662109375|cri_loss: -0.0055084228515625|unsuper_loss: 0.0
-average reward score: 4.12109375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.86%) |Training time=0.42s (19.55%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1283|ppo_ep: 1|act_loss: -0.0618896484375|cri_loss: -0.0242919921875|unsuper_loss: 0.0
-average reward score: 4.171875
--------------------------------------------------------------------------------------
-|E2E latency=2.62s |Gather latency=0.00s (0.00%) |Generate time=1.62s (61.85%) |Training time=0.44s (16.90%) |Others=0.56 (21.25%)|CurSamplesPerSec=12.21 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1284|ppo_ep: 1|act_loss: 0.0521240234375|cri_loss: 0.031524658203125|unsuper_loss: 0.0
-average reward score: 4.33984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.32%) |Training time=0.41s (19.09%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1285|ppo_ep: 1|act_loss: -0.01483154296875|cri_loss: 0.003387451171875|unsuper_loss: 0.0
-average reward score: 4.33984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.55%) |Training time=0.45s (20.90%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1286|ppo_ep: 1|act_loss: 0.0240478515625|cri_loss: 0.0176544189453125|unsuper_loss: 0.0
-average reward score: 4.65234375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.78%) |Training time=0.45s (20.66%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1287|ppo_ep: 1|act_loss: 0.038238525390625|cri_loss: 0.034881591796875|unsuper_loss: 0.0
-average reward score: 4.04296875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.77%) |Training time=0.45s (20.66%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
-[2023-04-14 09:35:04,453] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096
-epoch: 0|step: 1288|ppo_ep: 1|act_loss: 0.074951171875|cri_loss: 0.040374755859375|unsuper_loss: 0.0
-average reward score: 4.43359375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.51%) |Training time=0.43s (19.90%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.42
-[2023-04-14 09:35:06,600] [INFO] [logging.py:96:log_dist] [Rank 0] step=1290, skipped=17, lr=[9.166285470551969e-06, 9.166285470551969e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:35:06,618] [INFO] [timer.py:199:stop] epoch=0/micro_step=1290/global_step=1290, RunningAvgSamplesPerSec=109.96179691661273, CurrSamplesPerSec=110.87710045939353, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:35:06,710] [INFO] [logging.py:96:log_dist] [Rank 0] step=1290, skipped=19, lr=[4.750210129048555e-06, 4.750210129048555e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1289|ppo_ep: 1|act_loss: 0.126953125|cri_loss: 0.0889892578125|unsuper_loss: 0.0
-average reward score: 4.3125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.60%) |Training time=0.45s (20.88%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1290|ppo_ep: 1|act_loss: 0.0721435546875|cri_loss: 0.037872314453125|unsuper_loss: 0.0
-average reward score: 4.87109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.65%) |Training time=0.45s (20.80%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1291|ppo_ep: 1|act_loss: -0.00913238525390625|cri_loss: 0.00107574462890625|unsuper_loss: 0.0
-average reward score: 4.83984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.71%) |Training time=0.45s (20.72%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1292|ppo_ep: 1|act_loss: 0.02349853515625|cri_loss: 0.01522064208984375|unsuper_loss: 0.0
-average reward score: 4.265625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.94%) |Training time=0.44s (20.49%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1293|ppo_ep: 1|act_loss: -0.00255584716796875|cri_loss: 0.004985809326171875|unsuper_loss: 0.0
-average reward score: 4.85546875
--------------------------------------------------------------------------------------
-|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.63s (71.22%) |Training time=0.44s (19.20%) |Others=0.22 (9.58%)|CurSamplesPerSec=14.00 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1294|ppo_ep: 1|act_loss: 0.004199981689453125|cri_loss: 0.0028781890869140625|unsuper_loss: 0.0
-average reward score: 4.55078125
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.29%) |Training time=0.46s (20.78%) |Others=0.13 (5.93%)|CurSamplesPerSec=14.45 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1295|ppo_ep: 1|act_loss: -0.0011138916015625|cri_loss: 0.0023860931396484375|unsuper_loss: 0.0
-average reward score: 4.6328125
--------------------------------------------------------------------------------------
-|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.69s (75.54%) |Training time=0.44s (19.73%) |Others=0.11 (4.73%)|CurSamplesPerSec=14.29 |AvgSamplesPerSec=14.42
-[2023-04-14 09:35:21,992] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 4096, reducing to 2048
-[2023-04-14 09:35:22,078] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
-epoch: 0|step: 1296|ppo_ep: 1|act_loss: -0.000152587890625|cri_loss: 0.0015926361083984375|unsuper_loss: 0.0
-average reward score: 4.52734375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.39%) |Training time=0.42s (19.43%) |Others=0.09 (4.18%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1297|ppo_ep: 1|act_loss: -0.1217041015625|cri_loss: -0.04266357421875|unsuper_loss: 0.0
-average reward score: 4.83203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.76%) |Training time=0.45s (20.68%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1298|ppo_ep: 1|act_loss: -0.1729736328125|cri_loss: -0.057708740234375|unsuper_loss: 0.0
-average reward score: 3.98046875
--------------------------------------------------------------------------------------
-|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.65s (69.72%) |Training time=0.44s (18.58%) |Others=0.28 (11.70%)|CurSamplesPerSec=13.53 |AvgSamplesPerSec=14.42
-[2023-04-14 09:35:28,767] [INFO] [logging.py:96:log_dist] [Rank 0] step=1300, skipped=18, lr=[9.158963278694846e-06, 9.158963278694846e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:35:28,785] [INFO] [timer.py:199:stop] epoch=0/micro_step=1300/global_step=1300, RunningAvgSamplesPerSec=110.0024475916536, CurrSamplesPerSec=127.61274741908309, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:35:28,879] [INFO] [logging.py:96:log_dist] [Rank 0] step=1300, skipped=20, lr=[4.746422244731743e-06, 4.746422244731743e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1299|ppo_ep: 1|act_loss: 0.052490234375|cri_loss: 0.03717041015625|unsuper_loss: 0.0
-average reward score: 5.03125
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.75s (77.32%) |Training time=0.41s (18.29%) |Others=0.10 (4.38%)|CurSamplesPerSec=14.15 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1300|ppo_ep: 1|act_loss: 0.082763671875|cri_loss: 0.052001953125|unsuper_loss: 0.0
-average reward score: 3.94921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.73%) |Training time=0.45s (20.73%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1301|ppo_ep: 1|act_loss: 0.08441162109375|cri_loss: 0.04736328125|unsuper_loss: 0.0
-average reward score: 4.875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.97%) |Training time=0.44s (20.47%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1302|ppo_ep: 1|act_loss: 0.0318603515625|cri_loss: 0.0181427001953125|unsuper_loss: 0.0
-average reward score: 4.953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.76%) |Training time=0.45s (20.60%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1303|ppo_ep: 1|act_loss: 0.031951904296875|cri_loss: 0.020355224609375|unsuper_loss: 0.0
-average reward score: 4.77734375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.87%) |Training time=0.45s (20.58%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1304|ppo_ep: 1|act_loss: -0.00981903076171875|cri_loss: -0.0034885406494140625|unsuper_loss: 0.0
-average reward score: 4.796875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.75%) |Training time=0.45s (20.67%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1305|ppo_ep: 1|act_loss: -0.0030975341796875|cri_loss: -0.0007276535034179688|unsuper_loss: 0.0
-average reward score: 4.73828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.02%) |Training time=0.44s (20.42%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1306|ppo_ep: 1|act_loss: 0.209228515625|cri_loss: 0.11376953125|unsuper_loss: 0.0
-average reward score: 4.63671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.80%) |Training time=0.45s (20.65%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1307|ppo_ep: 1|act_loss: 2.288818359375e-05|cri_loss: 0.0047454833984375|unsuper_loss: 0.0
-average reward score: 4.25
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.84%) |Training time=0.44s (20.58%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1308|ppo_ep: 1|act_loss: -0.1138916015625|cri_loss: -0.0352783203125|unsuper_loss: 0.0
-average reward score: 4.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.95%) |Training time=0.44s (20.48%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
-[2023-04-14 09:35:50,565] [INFO] [logging.py:96:log_dist] [Rank 0] step=1310, skipped=18, lr=[9.150766485988878e-06, 9.150766485988878e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:35:50,950] [INFO] [timer.py:199:stop] epoch=0/micro_step=1310/global_step=1310, RunningAvgSamplesPerSec=109.92256089659742, CurrSamplesPerSec=49.722884509588546, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:35:51,044] [INFO] [logging.py:96:log_dist] [Rank 0] step=1310, skipped=20, lr=[4.742181853831721e-06, 4.742181853831721e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1309|ppo_ep: 1|act_loss: -0.1129150390625|cri_loss: -0.009521484375|unsuper_loss: 0.0
-average reward score: 4.7578125
--------------------------------------------------------------------------------------
-|E2E latency=2.69s |Gather latency=0.00s (0.00%) |Generate time=1.79s (66.42%) |Training time=0.81s (29.92%) |Others=0.10 (3.66%)|CurSamplesPerSec=11.88 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1310|ppo_ep: 1|act_loss: -0.14404296875|cri_loss: -0.050750732421875|unsuper_loss: 0.0
-average reward score: 5.12890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.92%) |Training time=0.44s (20.53%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1311|ppo_ep: 1|act_loss: 0.021026611328125|cri_loss: 0.028533935546875|unsuper_loss: 0.0
-average reward score: 5.12109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.80%) |Training time=0.45s (20.64%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1312|ppo_ep: 1|act_loss: 0.08160400390625|cri_loss: 0.04974365234375|unsuper_loss: 0.0
-average reward score: 4.71875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.04%) |Training time=0.44s (20.40%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1313|ppo_ep: 1|act_loss: 0.031280517578125|cri_loss: 0.0166015625|unsuper_loss: 0.0
-average reward score: 4.6484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.81%) |Training time=0.45s (20.64%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1314|ppo_ep: 1|act_loss: 0.0399169921875|cri_loss: 0.0246429443359375|unsuper_loss: 0.0
-average reward score: 4.23828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.48%) |Training time=0.43s (19.95%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1315|ppo_ep: 1|act_loss: -0.033721923828125|cri_loss: -0.014495849609375|unsuper_loss: 0.0
-average reward score: 4.4609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.71%) |Training time=0.45s (20.73%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1316|ppo_ep: 1|act_loss: 0.016845703125|cri_loss: 0.015380859375|unsuper_loss: 0.0
-average reward score: 4.87109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.61%) |Training time=0.43s (19.78%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1317|ppo_ep: 1|act_loss: -0.05865478515625|cri_loss: -0.0282440185546875|unsuper_loss: 0.0
-average reward score: 4.81640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.60%) |Training time=0.43s (19.83%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1318|ppo_ep: 1|act_loss: -0.00882720947265625|cri_loss: -0.00371551513671875|unsuper_loss: 0.0
-average reward score: 4.765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.46%) |Training time=0.43s (19.98%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.42
-[2023-04-14 09:36:12,565] [INFO] [logging.py:96:log_dist] [Rank 0] step=1320, skipped=18, lr=[9.142505574988243e-06, 9.142505574988243e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:36:12,583] [INFO] [timer.py:199:stop] epoch=0/micro_step=1320/global_step=1320, RunningAvgSamplesPerSec=109.96346053463445, CurrSamplesPerSec=112.99348394896265, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:36:12,676] [INFO] [logging.py:96:log_dist] [Rank 0] step=1320, skipped=20, lr=[4.737908228387656e-06, 4.737908228387656e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1319|ppo_ep: 1|act_loss: 0.0806884765625|cri_loss: 0.045318603515625|unsuper_loss: 0.0
-average reward score: 4.796875
--------------------------------------------------------------------------------------
-|E2E latency=2.42s |Gather latency=0.00s (0.00%) |Generate time=1.62s (67.12%) |Training time=0.45s (18.45%) |Others=0.35 (14.44%)|CurSamplesPerSec=13.24 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1320|ppo_ep: 1|act_loss: 0.0528564453125|cri_loss: 0.027801513671875|unsuper_loss: 0.0
-average reward score: 4.83203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.92%) |Training time=0.44s (20.52%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1321|ppo_ep: 1|act_loss: 0.047027587890625|cri_loss: 0.02423095703125|unsuper_loss: 0.0
-average reward score: 5.3046875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.88%) |Training time=0.44s (20.56%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1322|ppo_ep: 1|act_loss: 0.0877685546875|cri_loss: 0.050018310546875|unsuper_loss: 0.0
-average reward score: 5.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.92%) |Training time=0.44s (20.53%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1323|ppo_ep: 1|act_loss: 0.02587890625|cri_loss: 0.01342010498046875|unsuper_loss: 0.0
-average reward score: 4.7421875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.74%) |Training time=0.44s (20.08%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1324|ppo_ep: 1|act_loss: 0.005809783935546875|cri_loss: 0.00439453125|unsuper_loss: 0.0
-average reward score: 5.30078125
--------------------------------------------------------------------------------------
-|E2E latency=2.62s |Gather latency=0.00s (0.00%) |Generate time=1.80s (68.66%) |Training time=0.44s (16.72%) |Others=0.38 (14.63%)|CurSamplesPerSec=12.22 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1325|ppo_ep: 1|act_loss: 0.014617919921875|cri_loss: 0.0078277587890625|unsuper_loss: 0.0
-average reward score: 4.8046875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.12%) |Training time=0.44s (20.29%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1326|ppo_ep: 1|act_loss: 0.04364013671875|cri_loss: 0.0245361328125|unsuper_loss: 0.0
-average reward score: 4.88671875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.52%) |Training time=0.45s (20.56%) |Others=0.11 (4.92%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1327|ppo_ep: 1|act_loss: -0.030120849609375|cri_loss: -0.01197052001953125|unsuper_loss: 0.0
-average reward score: 4.8828125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.13%) |Training time=0.44s (20.34%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1328|ppo_ep: 1|act_loss: 0.016876220703125|cri_loss: 0.01165771484375|unsuper_loss: 0.0
-average reward score: 4.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.74s (76.74%) |Training time=0.43s (18.87%) |Others=0.10 (4.39%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.42
-[2023-04-14 09:36:35,048] [INFO] [logging.py:96:log_dist] [Rank 0] step=1330, skipped=18, lr=[9.134180668139572e-06, 9.134180668139572e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:36:35,574] [INFO] [timer.py:199:stop] epoch=0/micro_step=1330/global_step=1330, RunningAvgSamplesPerSec=109.86232216762367, CurrSamplesPerSec=41.83281054421599, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:36:35,667] [INFO] [logging.py:96:log_dist] [Rank 0] step=1330, skipped=20, lr=[4.733601431744987e-06, 4.733601431744987e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1329|ppo_ep: 1|act_loss: 0.00733184814453125|cri_loss: 0.004749298095703125|unsuper_loss: 0.0
-average reward score: 4.1796875
--------------------------------------------------------------------------------------
-|E2E latency=2.67s |Gather latency=0.00s (0.00%) |Generate time=1.64s (61.50%) |Training time=0.93s (34.79%) |Others=0.10 (3.71%)|CurSamplesPerSec=12.00 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1330|ppo_ep: 1|act_loss: 0.0728759765625|cri_loss: 0.03802490234375|unsuper_loss: 0.0
-average reward score: 5.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.93%) |Training time=0.44s (20.51%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1331|ppo_ep: 1|act_loss: 0.013946533203125|cri_loss: 0.00748443603515625|unsuper_loss: 0.0
-average reward score: 4.7109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.00%) |Training time=0.44s (20.44%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1332|ppo_ep: 1|act_loss: -0.0016345977783203125|cri_loss: 0.0003871917724609375|unsuper_loss: 0.0
-average reward score: 4.7421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.33%) |Training time=0.43s (20.11%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1333|ppo_ep: 1|act_loss: -0.047210693359375|cri_loss: -0.0224609375|unsuper_loss: 0.0
-average reward score: 4.4765625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.00%) |Training time=0.44s (20.38%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1334|ppo_ep: 1|act_loss: -0.0223236083984375|cri_loss: -0.0106201171875|unsuper_loss: 0.0
-average reward score: 4.67578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.76%) |Training time=0.45s (20.69%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1335|ppo_ep: 1|act_loss: -0.031768798828125|cri_loss: -0.0149383544921875|unsuper_loss: 0.0
-average reward score: 4.6484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.94%) |Training time=0.44s (20.49%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1336|ppo_ep: 1|act_loss: -0.0111541748046875|cri_loss: -0.005096435546875|unsuper_loss: 0.0
-average reward score: 4.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.62s (68.90%) |Training time=0.44s (18.63%) |Others=0.29 (12.47%)|CurSamplesPerSec=13.58 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1337|ppo_ep: 1|act_loss: 0.04083251953125|cri_loss: 0.023773193359375|unsuper_loss: 0.0
-average reward score: 4.734375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.96%) |Training time=0.44s (20.47%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1338|ppo_ep: 1|act_loss: 0.0168609619140625|cri_loss: 0.009246826171875|unsuper_loss: 0.0
-average reward score: 4.48828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.96%) |Training time=0.44s (20.48%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
-[2023-04-14 09:36:57,425] [INFO] [logging.py:96:log_dist] [Rank 0] step=1340, skipped=18, lr=[9.125791888838067e-06, 9.125791888838067e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:36:57,437] [INFO] [timer.py:199:stop] epoch=0/micro_step=1340/global_step=1340, RunningAvgSamplesPerSec=109.8848984580583, CurrSamplesPerSec=100.39286272603175, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:36:57,549] [INFO] [logging.py:96:log_dist] [Rank 0] step=1340, skipped=20, lr=[4.729261527740829e-06, 4.729261527740829e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1339|ppo_ep: 1|act_loss: 0.03173828125|cri_loss: 0.01629638671875|unsuper_loss: 0.0
-average reward score: 4.37890625
--------------------------------------------------------------------------------------
-|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.09%) |Training time=0.49s (22.06%) |Others=0.11 (4.85%)|CurSamplesPerSec=14.35 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1340|ppo_ep: 1|act_loss: 0.06378173828125|cri_loss: 0.037933349609375|unsuper_loss: 0.0
-average reward score: 4.14453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.80%) |Training time=0.45s (20.65%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1341|ppo_ep: 1|act_loss: 0.00018310546875|cri_loss: 0.0010709762573242188|unsuper_loss: 0.0
-average reward score: 4.68359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.92%) |Training time=0.44s (20.53%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1342|ppo_ep: 1|act_loss: 0.042938232421875|cri_loss: 0.0228118896484375|unsuper_loss: 0.0
-average reward score: 5.01171875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.00%) |Training time=0.44s (20.46%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1343|ppo_ep: 1|act_loss: -0.014617919921875|cri_loss: -0.00555419921875|unsuper_loss: 0.0
-average reward score: 3.982421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.58%) |Training time=0.43s (19.86%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1344|ppo_ep: 1|act_loss: 0.02569580078125|cri_loss: 0.014404296875|unsuper_loss: 0.0
-average reward score: 4.734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.90%) |Training time=0.40s (18.48%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1345|ppo_ep: 1|act_loss: 0.1107177734375|cri_loss: 0.06866455078125|unsuper_loss: 0.0
-average reward score: 4.34375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.83%) |Training time=0.45s (20.60%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1346|ppo_ep: 1|act_loss: -0.05108642578125|cri_loss: -0.02252197265625|unsuper_loss: 0.0
-average reward score: 4.9453125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.98%) |Training time=0.47s (21.53%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1347|ppo_ep: 1|act_loss: -0.043212890625|cri_loss: -0.020538330078125|unsuper_loss: 0.0
-average reward score: 5.09375
--------------------------------------------------------------------------------------
-|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.62s (76.96%) |Training time=0.39s (18.37%) |Others=0.10 (4.68%)|CurSamplesPerSec=15.20 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1348|ppo_ep: 1|act_loss: 0.0045166015625|cri_loss: 0.00312042236328125|unsuper_loss: 0.0
-average reward score: 4.6953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.55%) |Training time=0.45s (20.92%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.42
-[2023-04-14 09:37:19,283] [INFO] [logging.py:96:log_dist] [Rank 0] step=1350, skipped=18, lr=[9.117339361425675e-06, 9.117339361425675e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:37:19,301] [INFO] [timer.py:199:stop] epoch=0/micro_step=1350/global_step=1350, RunningAvgSamplesPerSec=109.93319858905161, CurrSamplesPerSec=114.07213302011552, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:37:19,394] [INFO] [logging.py:96:log_dist] [Rank 0] step=1350, skipped=20, lr=[4.72488858070303e-06, 4.72488858070303e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1349|ppo_ep: 1|act_loss: -0.035736083984375|cri_loss: -0.01666259765625|unsuper_loss: 0.0
-average reward score: 4.421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.11%) |Training time=0.44s (20.37%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1350|ppo_ep: 1|act_loss: -0.00806427001953125|cri_loss: -0.0030517578125|unsuper_loss: 0.0
-average reward score: 4.8515625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.02%) |Training time=0.45s (20.48%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1351|ppo_ep: 1|act_loss: -0.015472412109375|cri_loss: -0.00713348388671875|unsuper_loss: 0.0
-average reward score: 5.0859375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.71%) |Training time=0.46s (20.82%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1352|ppo_ep: 1|act_loss: 0.00513458251953125|cri_loss: 0.0028324127197265625|unsuper_loss: 0.0
-average reward score: 4.546875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.37%) |Training time=0.46s (21.11%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1353|ppo_ep: 1|act_loss: -0.00102996826171875|cri_loss: 0.0009441375732421875|unsuper_loss: 0.0
-average reward score: 3.984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.71%) |Training time=0.45s (20.75%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1354|ppo_ep: 1|act_loss: -0.0282135009765625|cri_loss: -0.01287841796875|unsuper_loss: 0.0
-average reward score: 4.58984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.74%) |Training time=0.43s (19.70%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1355|ppo_ep: 1|act_loss: 0.0009326934814453125|cri_loss: 0.0012836456298828125|unsuper_loss: 0.0
-average reward score: 4.5546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.45%) |Training time=0.43s (20.01%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1356|ppo_ep: 1|act_loss: -0.025543212890625|cri_loss: -0.0117034912109375|unsuper_loss: 0.0
-average reward score: 4.8828125
--------------------------------------------------------------------------------------
-|E2E latency=3.70s |Gather latency=0.00s (0.00%) |Generate time=1.81s (49.06%) |Training time=0.44s (11.88%) |Others=1.44 (39.06%)|CurSamplesPerSec=8.65 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1357|ppo_ep: 1|act_loss: 0.0008974075317382812|cri_loss: 0.001956939697265625|unsuper_loss: 0.0
-average reward score: 5.24609375
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.73s (76.27%) |Training time=0.44s (19.26%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.09 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1358|ppo_ep: 1|act_loss: 0.018463134765625|cri_loss: 0.010040283203125|unsuper_loss: 0.0
-average reward score: 4.72265625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.81%) |Training time=0.43s (19.63%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.42
-[2023-04-14 09:37:42,662] [INFO] [logging.py:96:log_dist] [Rank 0] step=1360, skipped=18, lr=[9.10882321118924e-06, 9.10882321118924e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:37:42,680] [INFO] [timer.py:199:stop] epoch=0/micro_step=1360/global_step=1360, RunningAvgSamplesPerSec=109.96246013017377, CurrSamplesPerSec=110.81549098318833, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:37:42,773] [INFO] [logging.py:96:log_dist] [Rank 0] step=1360, skipped=20, lr=[4.720482655449212e-06, 4.720482655449212e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1359|ppo_ep: 1|act_loss: 0.0360107421875|cri_loss: 0.0187225341796875|unsuper_loss: 0.0
-average reward score: 4.9921875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.74%) |Training time=0.45s (20.74%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1360|ppo_ep: 1|act_loss: 0.0202484130859375|cri_loss: 0.0105438232421875|unsuper_loss: 0.0
-average reward score: 5.25
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.40%) |Training time=0.46s (21.07%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1361|ppo_ep: 1|act_loss: 0.0132904052734375|cri_loss: 0.007068634033203125|unsuper_loss: 0.0
-average reward score: 4.32421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.58%) |Training time=0.45s (20.86%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1362|ppo_ep: 1|act_loss: -0.06787109375|cri_loss: -0.032440185546875|unsuper_loss: 0.0
-average reward score: 5.04296875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.30%) |Training time=0.46s (21.17%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1363|ppo_ep: 1|act_loss: -0.052978515625|cri_loss: -0.02508544921875|unsuper_loss: 0.0
-average reward score: 5.30859375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.46%) |Training time=0.46s (20.91%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1364|ppo_ep: 1|act_loss: -0.09228515625|cri_loss: -0.043792724609375|unsuper_loss: 0.0
-average reward score: 5.0703125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.14%) |Training time=0.47s (21.34%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1365|ppo_ep: 1|act_loss: -0.03424072265625|cri_loss: -0.0164642333984375|unsuper_loss: 0.0
-average reward score: 4.921875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.29%) |Training time=0.46s (21.19%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1366|ppo_ep: 1|act_loss: -0.0079803466796875|cri_loss: -0.0024280548095703125|unsuper_loss: 0.0
-average reward score: 4.9375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.89%) |Training time=0.45s (20.59%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1367|ppo_ep: 1|act_loss: 0.0533447265625|cri_loss: 0.0282745361328125|unsuper_loss: 0.0
-average reward score: 5.24609375
--------------------------------------------------------------------------------------
-|E2E latency=2.47s |Gather latency=0.00s (0.00%) |Generate time=1.62s (65.55%) |Training time=0.46s (18.74%) |Others=0.39 (15.72%)|CurSamplesPerSec=12.95 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1368|ppo_ep: 1|act_loss: -0.009552001953125|cri_loss: -0.0042877197265625|unsuper_loss: 0.0
-average reward score: 5.08984375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.40%) |Training time=0.46s (21.07%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.42
-[2023-04-14 09:38:04,730] [INFO] [logging.py:96:log_dist] [Rank 0] step=1370, skipped=18, lr=[9.100243564358655e-06, 9.100243564358655e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:38:04,748] [INFO] [timer.py:199:stop] epoch=0/micro_step=1370/global_step=1370, RunningAvgSamplesPerSec=109.94952050263434, CurrSamplesPerSec=109.40402084107701, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:38:04,841] [INFO] [logging.py:96:log_dist] [Rank 0] step=1370, skipped=20, lr=[4.716043817285819e-06, 4.716043817285819e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1369|ppo_ep: 1|act_loss: 0.04583740234375|cri_loss: 0.0237579345703125|unsuper_loss: 0.0
-average reward score: 5.40234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.54%) |Training time=0.46s (20.94%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1370|ppo_ep: 1|act_loss: 0.039947509765625|cri_loss: 0.0226593017578125|unsuper_loss: 0.0
-average reward score: 4.58203125
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.57%) |Training time=0.46s (20.94%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1371|ppo_ep: 1|act_loss: 0.01641845703125|cri_loss: 0.0088348388671875|unsuper_loss: 0.0
-average reward score: 5.04296875
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.47%) |Training time=0.45s (19.29%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1372|ppo_ep: 1|act_loss: 0.012786865234375|cri_loss: 0.007312774658203125|unsuper_loss: 0.0
-average reward score: 4.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.91%) |Training time=0.47s (21.22%) |Others=0.11 (4.86%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1373|ppo_ep: 1|act_loss: 0.037200927734375|cri_loss: 0.023590087890625|unsuper_loss: 0.0
-average reward score: 4.7421875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.93%) |Training time=0.45s (20.55%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1374|ppo_ep: 1|act_loss: -0.037384033203125|cri_loss: -0.01812744140625|unsuper_loss: 0.0
-average reward score: 5.09375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.33%) |Training time=0.46s (21.15%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1375|ppo_ep: 1|act_loss: -0.021392822265625|cri_loss: -0.00673675537109375|unsuper_loss: 0.0
-average reward score: 4.43359375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.21%) |Training time=0.46s (21.27%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1376|ppo_ep: 1|act_loss: 0.03839111328125|cri_loss: 0.020843505859375|unsuper_loss: 0.0
-average reward score: 4.7578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.26%) |Training time=0.46s (21.21%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1377|ppo_ep: 1|act_loss: 0.05328369140625|cri_loss: 0.0286865234375|unsuper_loss: 0.0
-average reward score: 4.87890625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.99%) |Training time=0.47s (21.46%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1378|ppo_ep: 1|act_loss: 0.003376007080078125|cri_loss: 0.0021820068359375|unsuper_loss: 0.0
-average reward score: 4.6171875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.39%) |Training time=0.46s (21.09%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.42
-[2023-04-14 09:38:26,722] [INFO] [logging.py:96:log_dist] [Rank 0] step=1380, skipped=18, lr=[9.091600548104982e-06, 9.091600548104982e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:38:26,740] [INFO] [timer.py:199:stop] epoch=0/micro_step=1380/global_step=1380, RunningAvgSamplesPerSec=109.93347342240803, CurrSamplesPerSec=109.94820174058928, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:38:26,833] [INFO] [logging.py:96:log_dist] [Rank 0] step=1380, skipped=20, lr=[4.711572132007139e-06, 4.711572132007139e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1379|ppo_ep: 1|act_loss: 0.05718994140625|cri_loss: 0.0311737060546875|unsuper_loss: 0.0
-average reward score: 4.49609375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.42%) |Training time=0.46s (20.95%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1380|ppo_ep: 1|act_loss: 0.00634765625|cri_loss: 0.0057830810546875|unsuper_loss: 0.0
-average reward score: 4.625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.11%) |Training time=0.47s (21.36%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1381|ppo_ep: 1|act_loss: -0.0004782676696777344|cri_loss: 3.814697265625e-06|unsuper_loss: 0.0
-average reward score: 4.6953125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.61%) |Training time=0.45s (20.88%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1382|ppo_ep: 1|act_loss: -0.0220947265625|cri_loss: -0.0086822509765625|unsuper_loss: 0.0
-average reward score: 4.65625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.37%) |Training time=0.46s (21.11%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1383|ppo_ep: 1|act_loss: 0.0055999755859375|cri_loss: 0.00392913818359375|unsuper_loss: 0.0
-average reward score: 4.79296875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.45%) |Training time=0.46s (21.01%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1384|ppo_ep: 1|act_loss: -0.0077667236328125|cri_loss: -0.003047943115234375|unsuper_loss: 0.0
-average reward score: 4.5703125
--------------------------------------------------------------------------------------
-|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.73s (75.31%) |Training time=0.47s (20.39%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.94 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1385|ppo_ep: 1|act_loss: -0.040283203125|cri_loss: -0.0183868408203125|unsuper_loss: 0.0
-average reward score: 5.046875
--------------------------------------------------------------------------------------
-|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.82s (76.63%) |Training time=0.46s (19.22%) |Others=0.10 (4.15%)|CurSamplesPerSec=13.49 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1386|ppo_ep: 1|act_loss: -0.00937652587890625|cri_loss: -0.00380706787109375|unsuper_loss: 0.0
-average reward score: 4.546875
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.73s (75.67%) |Training time=0.46s (20.02%) |Others=0.10 (4.31%)|CurSamplesPerSec=14.02 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1387|ppo_ep: 1|act_loss: -0.01216888427734375|cri_loss: -0.0052642822265625|unsuper_loss: 0.0
-average reward score: 4.80859375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.95%) |Training time=0.47s (21.50%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1388|ppo_ep: 1|act_loss: 0.021453857421875|cri_loss: 0.0120391845703125|unsuper_loss: 0.0
-average reward score: 4.8046875
--------------------------------------------------------------------------------------
-|E2E latency=2.38s |Gather latency=0.00s (0.00%) |Generate time=1.65s (69.38%) |Training time=0.44s (18.50%) |Others=0.29 (12.11%)|CurSamplesPerSec=13.42 |AvgSamplesPerSec=14.42
-[2023-04-14 09:38:49,163] [INFO] [logging.py:96:log_dist] [Rank 0] step=1390, skipped=18, lr=[9.082894290538575e-06, 9.082894290538575e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:38:49,180] [INFO] [timer.py:199:stop] epoch=0/micro_step=1390/global_step=1390, RunningAvgSamplesPerSec=109.91676030089931, CurrSamplesPerSec=106.04061561798818, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:38:49,273] [INFO] [logging.py:96:log_dist] [Rank 0] step=1390, skipped=20, lr=[4.707067665894335e-06, 4.707067665894335e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1389|ppo_ep: 1|act_loss: 0.0421142578125|cri_loss: 0.0222015380859375|unsuper_loss: 0.0
-average reward score: 4.61328125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.25%) |Training time=0.46s (21.25%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1390|ppo_ep: 1|act_loss: 0.01336669921875|cri_loss: 0.006893157958984375|unsuper_loss: 0.0
-average reward score: 4.62109375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.60%) |Training time=0.46s (20.91%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1391|ppo_ep: 1|act_loss: 0.06072998046875|cri_loss: 0.03216552734375|unsuper_loss: 0.0
-average reward score: 4.89453125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.21%) |Training time=0.47s (21.29%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1392|ppo_ep: 1|act_loss: -0.034271240234375|cri_loss: -0.0163726806640625|unsuper_loss: 0.0
-average reward score: 4.734375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.46%) |Training time=0.46s (21.02%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1393|ppo_ep: 1|act_loss: 0.08160400390625|cri_loss: 0.043060302734375|unsuper_loss: 0.0
-average reward score: 4.61328125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.03%) |Training time=0.45s (20.49%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1394|ppo_ep: 1|act_loss: -0.052520751953125|cri_loss: -0.0250244140625|unsuper_loss: 0.0
-average reward score: 4.671875
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.62s (69.56%) |Training time=0.46s (19.95%) |Others=0.24 (10.49%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1395|ppo_ep: 1|act_loss: -0.0552978515625|cri_loss: -0.024566650390625|unsuper_loss: 0.0
-average reward score: 4.421875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.01%) |Training time=0.47s (21.47%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1396|ppo_ep: 1|act_loss: -0.06494140625|cri_loss: -0.030853271484375|unsuper_loss: 0.0
-average reward score: 4.53125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.22%) |Training time=0.47s (21.28%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1397|ppo_ep: 1|act_loss: -0.051849365234375|cri_loss: -0.0247039794921875|unsuper_loss: 0.0
-average reward score: 5.23828125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.59%) |Training time=0.45s (20.87%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1398|ppo_ep: 1|act_loss: -0.0325927734375|cri_loss: -0.0157318115234375|unsuper_loss: 0.0
-average reward score: 4.72265625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.21%) |Training time=0.42s (19.24%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.42
-[2023-04-14 09:39:11,139] [INFO] [logging.py:96:log_dist] [Rank 0] step=1400, skipped=18, lr=[9.074124920707169e-06, 9.074124920707169e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:39:11,157] [INFO] [timer.py:199:stop] epoch=0/micro_step=1400/global_step=1400, RunningAvgSamplesPerSec=109.92654260577895, CurrSamplesPerSec=134.14327848044334, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:39:11,252] [INFO] [logging.py:96:log_dist] [Rank 0] step=1400, skipped=20, lr=[4.702530485714462e-06, 4.702530485714462e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1399|ppo_ep: 1|act_loss: -0.02215576171875|cri_loss: -0.009674072265625|unsuper_loss: 0.0
-average reward score: 5.1875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.68s (76.91%) |Training time=0.40s (18.43%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1400|ppo_ep: 1|act_loss: 0.0113372802734375|cri_loss: 0.006031036376953125|unsuper_loss: 0.0
-average reward score: 5.1953125
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.66s (74.61%) |Training time=0.46s (20.91%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.39 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1401|ppo_ep: 1|act_loss: 0.0224761962890625|cri_loss: 0.01213836669921875|unsuper_loss: 0.0
-average reward score: 5.0234375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.85%) |Training time=0.43s (19.63%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1402|ppo_ep: 1|act_loss: -0.0025653839111328125|cri_loss: -0.000286102294921875|unsuper_loss: 0.0
-average reward score: 5.10546875
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.16%) |Training time=0.45s (20.36%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1403|ppo_ep: 1|act_loss: 0.00897216796875|cri_loss: 0.005275726318359375|unsuper_loss: 0.0
-average reward score: 4.92578125
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.43%) |Training time=0.44s (20.11%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1404|ppo_ep: 1|act_loss: -0.051849365234375|cri_loss: -0.02508544921875|unsuper_loss: 0.0
-average reward score: 4.6640625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.88%) |Training time=0.43s (19.60%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1405|ppo_ep: 1|act_loss: 0.0389404296875|cri_loss: 0.02093505859375|unsuper_loss: 0.0
-average reward score: 4.6796875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.12%) |Training time=0.44s (20.32%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1406|ppo_ep: 1|act_loss: -0.00616455078125|cri_loss: -0.00276947021484375|unsuper_loss: 0.0
-average reward score: 4.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.10%) |Training time=0.42s (19.32%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1407|ppo_ep: 1|act_loss: -0.0345458984375|cri_loss: -0.016998291015625|unsuper_loss: 0.0
-average reward score: 4.8125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.68s (77.05%) |Training time=0.40s (18.42%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1408|ppo_ep: 1|act_loss: -0.016632080078125|cri_loss: -0.00641632080078125|unsuper_loss: 0.0
-average reward score: 4.75390625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.81%) |Training time=0.43s (19.60%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.42
-[2023-04-14 09:39:33,017] [INFO] [logging.py:96:log_dist] [Rank 0] step=1410, skipped=18, lr=[9.065292568593984e-06, 9.065292568593984e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:39:33,035] [INFO] [timer.py:199:stop] epoch=0/micro_step=1410/global_step=1410, RunningAvgSamplesPerSec=109.99575030295641, CurrSamplesPerSec=120.44529078042189, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:39:33,127] [INFO] [logging.py:96:log_dist] [Rank 0] step=1410, skipped=20, lr=[4.697960658719475e-06, 4.697960658719475e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1409|ppo_ep: 1|act_loss: 0.052001953125|cri_loss: 0.027801513671875|unsuper_loss: 0.0
-average reward score: 5.2421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.68%) |Training time=0.43s (19.78%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1410|ppo_ep: 1|act_loss: 0.01788330078125|cri_loss: 0.0096588134765625|unsuper_loss: 0.0
-average reward score: 5.16015625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.56%) |Training time=0.43s (19.92%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1411|ppo_ep: 1|act_loss: 0.0845947265625|cri_loss: 0.04779052734375|unsuper_loss: 0.0
-average reward score: 4.6328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.32%) |Training time=0.44s (20.15%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1412|ppo_ep: 1|act_loss: -0.00656890869140625|cri_loss: -0.0027256011962890625|unsuper_loss: 0.0
-average reward score: 4.71484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.55%) |Training time=0.41s (18.89%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1413|ppo_ep: 1|act_loss: 0.018768310546875|cri_loss: 0.00978851318359375|unsuper_loss: 0.0
-average reward score: 4.515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.15%) |Training time=0.41s (18.95%) |Others=0.11 (4.90%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1414|ppo_ep: 1|act_loss: -0.06195068359375|cri_loss: -0.029815673828125|unsuper_loss: 0.0
-average reward score: 4.76171875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.08%) |Training time=0.49s (22.42%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1415|ppo_ep: 1|act_loss: -0.056732177734375|cri_loss: -0.0274505615234375|unsuper_loss: 0.0
-average reward score: 4.86328125
--------------------------------------------------------------------------------------
-|E2E latency=2.45s |Gather latency=0.00s (0.00%) |Generate time=1.86s (76.14%) |Training time=0.49s (19.85%) |Others=0.10 (4.01%)|CurSamplesPerSec=13.07 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1416|ppo_ep: 1|act_loss: -0.016510009765625|cri_loss: -0.007720947265625|unsuper_loss: 0.0
-average reward score: 5.296875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.30%) |Training time=0.48s (22.19%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1417|ppo_ep: 1|act_loss: 0.018585205078125|cri_loss: 0.01007080078125|unsuper_loss: 0.0
-average reward score: 5.10546875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.31%) |Training time=0.48s (22.19%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1418|ppo_ep: 1|act_loss: 0.04681396484375|cri_loss: 0.0242156982421875|unsuper_loss: 0.0
-average reward score: 5.08984375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.18%) |Training time=0.49s (22.35%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.42
-[2023-04-14 09:39:55,090] [INFO] [logging.py:96:log_dist] [Rank 0] step=1420, skipped=18, lr=[9.056397365115782e-06, 9.056397365115782e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:39:55,108] [INFO] [timer.py:199:stop] epoch=0/micro_step=1420/global_step=1420, RunningAvgSamplesPerSec=109.97072218501401, CurrSamplesPerSec=93.94947837627466, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:39:55,200] [INFO] [logging.py:96:log_dist] [Rank 0] step=1420, skipped=20, lr=[4.693358252645234e-06, 4.693358252645234e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1419|ppo_ep: 1|act_loss: 0.0171661376953125|cri_loss: 0.00890350341796875|unsuper_loss: 0.0
-average reward score: 5.17578125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.59%) |Training time=0.50s (22.96%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1420|ppo_ep: 1|act_loss: -0.01690673828125|cri_loss: -0.00787353515625|unsuper_loss: 0.0
-average reward score: 4.515625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.05%) |Training time=0.47s (21.43%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1421|ppo_ep: 1|act_loss: -0.0090484619140625|cri_loss: -0.003841400146484375|unsuper_loss: 0.0
-average reward score: 4.59375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.37%) |Training time=0.48s (22.14%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1422|ppo_ep: 1|act_loss: -0.03363037109375|cri_loss: -0.01525115966796875|unsuper_loss: 0.0
-average reward score: 4.875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.55%) |Training time=0.48s (21.94%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1423|ppo_ep: 1|act_loss: -0.00421142578125|cri_loss: -0.001033782958984375|unsuper_loss: 0.0
-average reward score: 5.0703125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.37%) |Training time=0.48s (22.13%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1424|ppo_ep: 1|act_loss: 0.040191650390625|cri_loss: 0.020843505859375|unsuper_loss: 0.0
-average reward score: 4.77734375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.41%) |Training time=0.48s (22.07%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1425|ppo_ep: 1|act_loss: 0.0694580078125|cri_loss: 0.03741455078125|unsuper_loss: 0.0
-average reward score: 4.4765625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.88%) |Training time=0.49s (22.61%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1426|ppo_ep: 1|act_loss: -0.0015506744384765625|cri_loss: 0.000782012939453125|unsuper_loss: 0.0
-average reward score: 4.77734375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.80%) |Training time=0.50s (22.71%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1427|ppo_ep: 1|act_loss: -0.0264739990234375|cri_loss: -0.0119781494140625|unsuper_loss: 0.0
-average reward score: 4.70703125
--------------------------------------------------------------------------------------
-|E2E latency=2.44s |Gather latency=0.00s (0.00%) |Generate time=1.58s (64.64%) |Training time=0.50s (20.50%) |Others=0.36 (14.86%)|CurSamplesPerSec=13.11 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1428|ppo_ep: 1|act_loss: -0.052154541015625|cri_loss: -0.024078369140625|unsuper_loss: 0.0
-average reward score: 4.7578125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.42%) |Training time=0.50s (23.06%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.42
-[2023-04-14 09:40:17,160] [INFO] [logging.py:96:log_dist] [Rank 0] step=1430, skipped=18, lr=[9.04743944212094e-06, 9.04743944212094e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:40:17,178] [INFO] [timer.py:199:stop] epoch=0/micro_step=1430/global_step=1430, RunningAvgSamplesPerSec=109.87632875471583, CurrSamplesPerSec=92.99834883909462, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:40:17,305] [INFO] [logging.py:96:log_dist] [Rank 0] step=1430, skipped=20, lr=[4.688723335710501e-06, 4.688723335710501e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1429|ppo_ep: 1|act_loss: 0.0249176025390625|cri_loss: 0.01410675048828125|unsuper_loss: 0.0
-average reward score: 4.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.20%) |Training time=0.51s (22.89%) |Others=0.13 (5.91%)|CurSamplesPerSec=14.45 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1430|ppo_ep: 1|act_loss: 0.01303863525390625|cri_loss: 0.00823974609375|unsuper_loss: 0.0
-average reward score: 5.10546875
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.73s (74.82%) |Training time=0.48s (20.91%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.82 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1431|ppo_ep: 1|act_loss: -0.0116119384765625|cri_loss: -0.0054779052734375|unsuper_loss: 0.0
-average reward score: 4.90625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.72%) |Training time=0.50s (22.78%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1432|ppo_ep: 1|act_loss: 0.0116424560546875|cri_loss: 0.00733184814453125|unsuper_loss: 0.0
-average reward score: 5.52734375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.62%) |Training time=0.48s (21.90%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1433|ppo_ep: 1|act_loss: 0.019805908203125|cri_loss: 0.01030731201171875|unsuper_loss: 0.0
-average reward score: 4.67578125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.72%) |Training time=0.50s (22.78%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1434|ppo_ep: 1|act_loss: 0.0243682861328125|cri_loss: 0.0135650634765625|unsuper_loss: 0.0
-average reward score: 5.1171875
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.72%) |Training time=0.52s (23.55%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.49 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1435|ppo_ep: 1|act_loss: -0.00748443603515625|cri_loss: -0.0032596588134765625|unsuper_loss: 0.0
-average reward score: 5.109375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.50%) |Training time=0.50s (22.77%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1436|ppo_ep: 1|act_loss: 0.019012451171875|cri_loss: 0.01018524169921875|unsuper_loss: 0.0
-average reward score: 5.3984375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.41%) |Training time=0.50s (23.08%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1437|ppo_ep: 1|act_loss: 0.01898193359375|cri_loss: 0.010498046875|unsuper_loss: 0.0
-average reward score: 4.64453125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.64%) |Training time=0.50s (22.85%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1438|ppo_ep: 1|act_loss: 0.0021076202392578125|cri_loss: 0.0012311935424804688|unsuper_loss: 0.0
-average reward score: 5.296875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.67%) |Training time=0.50s (22.84%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.42
-[2023-04-14 09:40:39,193] [INFO] [logging.py:96:log_dist] [Rank 0] step=1440, skipped=18, lr=[9.038418932387486e-06, 9.038418932387486e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:40:39,211] [INFO] [timer.py:199:stop] epoch=0/micro_step=1440/global_step=1440, RunningAvgSamplesPerSec=109.76133992307255, CurrSamplesPerSec=94.28474446729834, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:40:39,304] [INFO] [logging.py:96:log_dist] [Rank 0] step=1440, skipped=20, lr=[4.684055976615924e-06, 4.684055976615924e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1439|ppo_ep: 1|act_loss: -0.04058837890625|cri_loss: -0.0198516845703125|unsuper_loss: 0.0
-average reward score: 5.4609375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.59%) |Training time=0.50s (22.94%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1440|ppo_ep: 1|act_loss: -0.08544921875|cri_loss: -0.04156494140625|unsuper_loss: 0.0
-average reward score: 4.83984375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.56%) |Training time=0.50s (22.86%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1441|ppo_ep: 1|act_loss: 0.008087158203125|cri_loss: 0.005504608154296875|unsuper_loss: 0.0
-average reward score: 5.125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.60%) |Training time=0.50s (22.90%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1442|ppo_ep: 1|act_loss: -0.0049896240234375|cri_loss: -0.001865386962890625|unsuper_loss: 0.0
-average reward score: 5.9921875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.33%) |Training time=0.51s (23.17%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1443|ppo_ep: 1|act_loss: 0.02874755859375|cri_loss: 0.01544189453125|unsuper_loss: 0.0
-average reward score: 5.15625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.61%) |Training time=0.50s (22.89%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1444|ppo_ep: 1|act_loss: 0.0271453857421875|cri_loss: 0.0143585205078125|unsuper_loss: 0.0
-average reward score: 4.92578125
--------------------------------------------------------------------------------------
-|E2E latency=2.45s |Gather latency=0.00s (0.00%) |Generate time=1.78s (72.41%) |Training time=0.58s (23.59%) |Others=0.10 (4.01%)|CurSamplesPerSec=13.04 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1445|ppo_ep: 1|act_loss: -0.0135955810546875|cri_loss: -0.00629425048828125|unsuper_loss: 0.0
-average reward score: 5.1796875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.75%) |Training time=0.50s (22.73%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1446|ppo_ep: 1|act_loss: -0.0104522705078125|cri_loss: -0.00444793701171875|unsuper_loss: 0.0
-average reward score: 5.40625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.35%) |Training time=0.51s (23.17%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1447|ppo_ep: 1|act_loss: -0.009429931640625|cri_loss: -0.00450897216796875|unsuper_loss: 0.0
-average reward score: 5.0625
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.84%) |Training time=0.52s (23.64%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1448|ppo_ep: 1|act_loss: -0.06170654296875|cri_loss: -0.029632568359375|unsuper_loss: 0.0
-average reward score: 5.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.32%) |Training time=0.48s (22.16%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.42
-[2023-04-14 09:41:01,351] [INFO] [logging.py:96:log_dist] [Rank 0] step=1450, skipped=18, lr=[9.029335969621133e-06, 9.029335969621133e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:41:01,370] [INFO] [timer.py:199:stop] epoch=0/micro_step=1450/global_step=1450, RunningAvgSamplesPerSec=109.61442490775492, CurrSamplesPerSec=87.41166563657832, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:41:01,462] [INFO] [logging.py:96:log_dist] [Rank 0] step=1450, skipped=20, lr=[4.679356244543027e-06, 4.679356244543027e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1449|ppo_ep: 1|act_loss: -0.037841796875|cri_loss: -0.0179443359375|unsuper_loss: 0.0
-average reward score: 5.16796875
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.78%) |Training time=0.53s (23.81%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.41 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1450|ppo_ep: 1|act_loss: 0.0478515625|cri_loss: 0.024993896484375|unsuper_loss: 0.0
-average reward score: 5.1953125
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.61s (72.66%) |Training time=0.51s (22.84%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.42 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1451|ppo_ep: 1|act_loss: 0.04669189453125|cri_loss: 0.0257415771484375|unsuper_loss: 0.0
-average reward score: 5.859375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (72.93%) |Training time=0.50s (22.59%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1452|ppo_ep: 1|act_loss: 0.055023193359375|cri_loss: 0.0299072265625|unsuper_loss: 0.0
-average reward score: 4.83984375
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.64s (73.94%) |Training time=0.48s (21.58%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1453|ppo_ep: 1|act_loss: 0.026885986328125|cri_loss: 0.0149993896484375|unsuper_loss: 0.0
-average reward score: 6.34375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.59%) |Training time=0.50s (22.93%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1454|ppo_ep: 1|act_loss: 0.0374755859375|cri_loss: 0.0211029052734375|unsuper_loss: 0.0
-average reward score: 5.625
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.67%) |Training time=0.53s (23.88%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.45 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1455|ppo_ep: 1|act_loss: 0.021514892578125|cri_loss: 0.0114288330078125|unsuper_loss: 0.0
-average reward score: 4.8984375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.60%) |Training time=0.50s (22.93%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1456|ppo_ep: 1|act_loss: -0.03704833984375|cri_loss: -0.017303466796875|unsuper_loss: 0.0
-average reward score: 6.1953125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.59%) |Training time=0.50s (22.87%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1457|ppo_ep: 1|act_loss: -0.0538330078125|cri_loss: -0.0260467529296875|unsuper_loss: 0.0
-average reward score: 5.6328125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.48%) |Training time=0.48s (22.04%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1458|ppo_ep: 1|act_loss: -0.018829345703125|cri_loss: -0.00910186767578125|unsuper_loss: 0.0
-average reward score: 5.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.19%) |Training time=0.49s (22.31%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.42
-[2023-04-14 09:41:23,569] [INFO] [logging.py:96:log_dist] [Rank 0] step=1460, skipped=18, lr=[9.020190688453302e-06, 9.020190688453302e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:41:23,671] [INFO] [timer.py:199:stop] epoch=0/micro_step=1460/global_step=1460, RunningAvgSamplesPerSec=109.47243506677798, CurrSamplesPerSec=72.63209936403898, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:41:23,763] [INFO] [logging.py:96:log_dist] [Rank 0] step=1460, skipped=20, lr=[4.674624209153173e-06, 4.674624209153173e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1459|ppo_ep: 1|act_loss: 0.00940704345703125|cri_loss: 0.00525665283203125|unsuper_loss: 0.0
-average reward score: 5.625
--------------------------------------------------------------------------------------
-|E2E latency=2.48s |Gather latency=0.00s (0.00%) |Generate time=1.77s (71.68%) |Training time=0.60s (24.38%) |Others=0.10 (3.95%)|CurSamplesPerSec=12.93 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1460|ppo_ep: 1|act_loss: -0.01226043701171875|cri_loss: -0.00563812255859375|unsuper_loss: 0.0
-average reward score: 5.1171875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.26%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1461|ppo_ep: 1|act_loss: 0.034271240234375|cri_loss: 0.01806640625|unsuper_loss: 0.0
-average reward score: 5.6328125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.88%) |Training time=0.49s (22.61%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1462|ppo_ep: 1|act_loss: 0.079345703125|cri_loss: 0.04486083984375|unsuper_loss: 0.0
-average reward score: 5.62109375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.17%) |Training time=0.49s (22.31%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1463|ppo_ep: 1|act_loss: 0.00299835205078125|cri_loss: 0.0023288726806640625|unsuper_loss: 0.0
-average reward score: 5.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.43%) |Training time=0.48s (22.04%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1464|ppo_ep: 1|act_loss: -0.00670623779296875|cri_loss: -0.00295257568359375|unsuper_loss: 0.0
-average reward score: 5.84375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.74%) |Training time=0.47s (21.75%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1465|ppo_ep: 1|act_loss: -0.027069091796875|cri_loss: -0.0130462646484375|unsuper_loss: 0.0
-average reward score: 4.88671875
--------------------------------------------------------------------------------------
-|E2E latency=2.61s |Gather latency=0.00s (0.00%) |Generate time=1.61s (61.52%) |Training time=0.47s (17.96%) |Others=0.54 (20.52%)|CurSamplesPerSec=12.24 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1466|ppo_ep: 1|act_loss: 0.002590179443359375|cri_loss: 0.001628875732421875|unsuper_loss: 0.0
-average reward score: 5.1328125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.15%) |Training time=0.46s (21.33%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1467|ppo_ep: 1|act_loss: 0.00725555419921875|cri_loss: 0.00450897216796875|unsuper_loss: 0.0
-average reward score: 5.0234375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.04%) |Training time=0.47s (21.45%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1468|ppo_ep: 1|act_loss: 0.01209259033203125|cri_loss: 0.006256103515625|unsuper_loss: 0.0
-average reward score: 5.0625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.06%) |Training time=0.46s (21.34%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42
-[2023-04-14 09:41:45,881] [INFO] [logging.py:96:log_dist] [Rank 0] step=1470, skipped=18, lr=[9.010983224439122e-06, 9.010983224439122e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:41:45,900] [INFO] [timer.py:199:stop] epoch=0/micro_step=1470/global_step=1470, RunningAvgSamplesPerSec=109.42062658822219, CurrSamplesPerSec=106.38218975695435, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:41:45,992] [INFO] [logging.py:96:log_dist] [Rank 0] step=1470, skipped=20, lr=[4.6698599405865465e-06, 4.6698599405865465e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1469|ppo_ep: 1|act_loss: -0.00179290771484375|cri_loss: 0.0003948211669921875|unsuper_loss: 0.0
-average reward score: 5.37109375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.10%) |Training time=0.46s (21.37%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1470|ppo_ep: 1|act_loss: 0.0103302001953125|cri_loss: 0.006702423095703125|unsuper_loss: 0.0
-average reward score: 5.1875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.59%) |Training time=0.48s (21.90%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1471|ppo_ep: 1|act_loss: 0.02276611328125|cri_loss: 0.0134124755859375|unsuper_loss: 0.0
-average reward score: 5.85546875
--------------------------------------------------------------------------------------
-|E2E latency=2.60s |Gather latency=0.00s (0.00%) |Generate time=1.60s (61.64%) |Training time=0.47s (18.19%) |Others=0.52 (20.17%)|CurSamplesPerSec=12.33 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1472|ppo_ep: 1|act_loss: 0.0002186298370361328|cri_loss: 0.0006475448608398438|unsuper_loss: 0.0
-average reward score: 5.8515625
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.57%) |Training time=0.48s (21.95%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1473|ppo_ep: 1|act_loss: 0.01290130615234375|cri_loss: 0.00920867919921875|unsuper_loss: 0.0
-average reward score: 5.203125
--------------------------------------------------------------------------------------
-|E2E latency=2.43s |Gather latency=0.00s (0.00%) |Generate time=1.85s (76.14%) |Training time=0.48s (19.81%) |Others=0.10 (4.04%)|CurSamplesPerSec=13.16 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1474|ppo_ep: 1|act_loss: -0.0024871826171875|cri_loss: -0.0009250640869140625|unsuper_loss: 0.0
-average reward score: 5.390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.58%) |Training time=0.48s (21.91%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1475|ppo_ep: 1|act_loss: -0.00386810302734375|cri_loss: -0.0015048980712890625|unsuper_loss: 0.0
-average reward score: 5.04296875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.74%) |Training time=0.50s (22.76%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1476|ppo_ep: 1|act_loss: -0.00803375244140625|cri_loss: -0.0038585662841796875|unsuper_loss: 0.0
-average reward score: 5.9375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.91%) |Training time=0.49s (22.57%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1477|ppo_ep: 1|act_loss: 0.034149169921875|cri_loss: 0.0176544189453125|unsuper_loss: 0.0
-average reward score: 5.234375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.10%) |Training time=0.49s (22.38%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1478|ppo_ep: 1|act_loss: 0.000904083251953125|cri_loss: 0.0015773773193359375|unsuper_loss: 0.0
-average reward score: 5.76171875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.70%) |Training time=0.48s (21.80%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.42
-[2023-04-14 09:42:08,344] [INFO] [logging.py:96:log_dist] [Rank 0] step=1480, skipped=18, lr=[9.00171371405542e-06, 9.00171371405542e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:42:08,362] [INFO] [timer.py:199:stop] epoch=0/micro_step=1480/global_step=1480, RunningAvgSamplesPerSec=109.35307940699091, CurrSamplesPerSec=102.23690081816804, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:42:08,454] [INFO] [logging.py:96:log_dist] [Rank 0] step=1480, skipped=20, lr=[4.665063509461098e-06, 4.665063509461098e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1479|ppo_ep: 1|act_loss: 0.053741455078125|cri_loss: 0.030670166015625|unsuper_loss: 0.0
-average reward score: 5.12109375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.60%) |Training time=0.48s (21.90%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1480|ppo_ep: 1|act_loss: 0.021942138671875|cri_loss: 0.01233673095703125|unsuper_loss: 0.0
-average reward score: 5.3203125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.44%) |Training time=0.48s (22.01%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1481|ppo_ep: 1|act_loss: -0.05841064453125|cri_loss: -0.0283660888671875|unsuper_loss: 0.0
-average reward score: 5.2734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.65%) |Training time=0.47s (21.83%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1482|ppo_ep: 1|act_loss: -0.03936767578125|cri_loss: -0.0176849365234375|unsuper_loss: 0.0
-average reward score: 5.96875
--------------------------------------------------------------------------------------
-|E2E latency=2.46s |Gather latency=0.00s (0.00%) |Generate time=1.60s (65.07%) |Training time=0.47s (19.00%) |Others=0.39 (15.93%)|CurSamplesPerSec=12.99 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1483|ppo_ep: 1|act_loss: 0.0780029296875|cri_loss: 0.04107666015625|unsuper_loss: 0.0
-average reward score: 5.66015625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.58%) |Training time=0.48s (21.91%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1484|ppo_ep: 1|act_loss: 0.010711669921875|cri_loss: 0.00705718994140625|unsuper_loss: 0.0
-average reward score: 4.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.98%) |Training time=0.47s (21.50%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1485|ppo_ep: 1|act_loss: 0.022308349609375|cri_loss: 0.01165008544921875|unsuper_loss: 0.0
-average reward score: 5.21875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.90%) |Training time=0.47s (21.57%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1486|ppo_ep: 1|act_loss: -0.0200347900390625|cri_loss: -0.0081634521484375|unsuper_loss: 0.0
-average reward score: 5.078125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.05%) |Training time=0.47s (21.43%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1487|ppo_ep: 1|act_loss: 0.01959228515625|cri_loss: 0.01219940185546875|unsuper_loss: 0.0
-average reward score: 5.16015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.96%) |Training time=0.47s (21.51%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1488|ppo_ep: 1|act_loss: -0.05877685546875|cri_loss: -0.0276641845703125|unsuper_loss: 0.0
-average reward score: 5.20703125
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.88%) |Training time=0.47s (19.92%) |Others=0.10 (4.20%)|CurSamplesPerSec=13.66 |AvgSamplesPerSec=14.42
-[2023-04-14 09:42:30,551] [INFO] [logging.py:96:log_dist] [Rank 0] step=1490, skipped=18, lr=[8.992382294698705e-06, 8.992382294698705e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:42:30,570] [INFO] [timer.py:199:stop] epoch=0/micro_step=1490/global_step=1490, RunningAvgSamplesPerSec=109.31296286894379, CurrSamplesPerSec=103.17163047296637, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:42:30,664] [INFO] [logging.py:96:log_dist] [Rank 0] step=1490, skipped=20, lr=[4.660234986871507e-06, 4.660234986871507e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1489|ppo_ep: 1|act_loss: 0.0299530029296875|cri_loss: 0.0162506103515625|unsuper_loss: 0.0
-average reward score: 5.08984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.67%) |Training time=0.47s (21.74%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1490|ppo_ep: 1|act_loss: -0.02716064453125|cri_loss: -0.01160430908203125|unsuper_loss: 0.0
-average reward score: 4.71484375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.50%) |Training time=0.48s (21.96%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1491|ppo_ep: 1|act_loss: -0.006702423095703125|cri_loss: -0.0006561279296875|unsuper_loss: 0.0
-average reward score: 4.7890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.84%) |Training time=0.47s (21.62%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1492|ppo_ep: 1|act_loss: 0.037200927734375|cri_loss: 0.020538330078125|unsuper_loss: 0.0
-average reward score: 5.14453125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.49%) |Training time=0.48s (21.98%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1493|ppo_ep: 1|act_loss: -0.050750732421875|cri_loss: -0.02349853515625|unsuper_loss: 0.0
-average reward score: 4.421875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.39%) |Training time=0.48s (22.10%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1494|ppo_ep: 1|act_loss: 0.03173828125|cri_loss: 0.0195159912109375|unsuper_loss: 0.0
-average reward score: 5.23828125
--------------------------------------------------------------------------------------
-|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.60s (67.96%) |Training time=0.48s (20.14%) |Others=0.28 (11.90%)|CurSamplesPerSec=13.56 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1495|ppo_ep: 1|act_loss: -0.03515625|cri_loss: -0.0159454345703125|unsuper_loss: 0.0
-average reward score: 5.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.59%) |Training time=0.48s (21.88%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1496|ppo_ep: 1|act_loss: -0.05670166015625|cri_loss: -0.025726318359375|unsuper_loss: 0.0
-average reward score: 4.83203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.50%) |Training time=0.48s (21.98%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.42
-[2023-04-14 09:42:48,235] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 1497|ppo_ep: 1|act_loss: 0.013671875|cri_loss: 0.013336181640625|unsuper_loss: 0.0
-average reward score: 4.99609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.47s (21.93%) |Others=0.09 (4.05%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42
-[2023-04-14 09:42:50,401] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 1498|ppo_ep: 1|act_loss: 0.046875|cri_loss: 0.0252685546875|unsuper_loss: 0.0
-average reward score: 5.39453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.68%) |Training time=0.48s (22.20%) |Others=0.09 (4.12%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42
-[2023-04-14 09:42:52,461] [INFO] [logging.py:96:log_dist] [Rank 0] step=1500, skipped=18, lr=[8.982989104683118e-06, 8.982989104683118e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:42:52,480] [INFO] [timer.py:199:stop] epoch=0/micro_step=1500/global_step=1500, RunningAvgSamplesPerSec=109.25825586216985, CurrSamplesPerSec=100.98657928694233, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:42:52,573] [INFO] [logging.py:96:log_dist] [Rank 0] step=1500, skipped=22, lr=[4.656349111024974e-06, 4.656349111024974e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1499|ppo_ep: 1|act_loss: 0.037841796875|cri_loss: 0.023956298828125|unsuper_loss: 0.0
-average reward score: 4.86328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.42%) |Training time=0.48s (22.05%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1500|ppo_ep: 1|act_loss: -0.0390625|cri_loss: -0.0156402587890625|unsuper_loss: 0.0
-average reward score: 4.95703125
--------------------------------------------------------------------------------------
-|E2E latency=2.95s |Gather latency=0.00s (0.00%) |Generate time=1.60s (54.11%) |Training time=0.48s (16.34%) |Others=0.87 (29.55%)|CurSamplesPerSec=10.84 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1501|ppo_ep: 1|act_loss: 0.0438232421875|cri_loss: 0.024444580078125|unsuper_loss: 0.0
-average reward score: 5.41796875
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.98%) |Training time=0.48s (21.58%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1502|ppo_ep: 1|act_loss: 0.054840087890625|cri_loss: 0.037109375|unsuper_loss: 0.0
-average reward score: 4.57421875
--------------------------------------------------------------------------------------
-|E2E latency=2.47s |Gather latency=0.00s (0.00%) |Generate time=1.86s (75.41%) |Training time=0.51s (20.59%) |Others=0.10 (4.00%)|CurSamplesPerSec=12.98 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1503|ppo_ep: 1|act_loss: 0.0931396484375|cri_loss: 0.05364990234375|unsuper_loss: 0.0
-average reward score: 4.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.03%) |Training time=0.46s (21.42%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1504|ppo_ep: 1|act_loss: -0.000888824462890625|cri_loss: 0.0021839141845703125|unsuper_loss: 0.0
-average reward score: 5.234375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.11%) |Training time=0.46s (21.34%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1505|ppo_ep: 1|act_loss: -0.1180419921875|cri_loss: -0.049957275390625|unsuper_loss: 0.0
-average reward score: 5.0703125
--------------------------------------------------------------------------------------
-|E2E latency=2.76s |Gather latency=0.00s (0.00%) |Generate time=1.60s (57.98%) |Training time=0.46s (16.70%) |Others=0.70 (25.32%)|CurSamplesPerSec=11.60 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1506|ppo_ep: 1|act_loss: -0.0391845703125|cri_loss: -0.0111236572265625|unsuper_loss: 0.0
-average reward score: 4.72265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.30%) |Training time=0.46s (21.14%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1507|ppo_ep: 1|act_loss: -0.08062744140625|cri_loss: -0.0303497314453125|unsuper_loss: 0.0
-average reward score: 4.83984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.40%) |Training time=0.45s (21.03%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1508|ppo_ep: 1|act_loss: 0.01904296875|cri_loss: 0.0215911865234375|unsuper_loss: 0.0
-average reward score: 4.76171875
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.15%) |Training time=0.52s (23.41%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.44 |AvgSamplesPerSec=14.42
-[2023-04-14 09:43:15,905] [INFO] [logging.py:96:log_dist] [Rank 0] step=1510, skipped=18, lr=[8.973534283238398e-06, 8.973534283238398e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:43:15,923] [INFO] [timer.py:199:stop] epoch=0/micro_step=1510/global_step=1510, RunningAvgSamplesPerSec=109.20020953413868, CurrSamplesPerSec=92.72023189431013, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:43:16,016] [INFO] [logging.py:96:log_dist] [Rank 0] step=1510, skipped=22, lr=[4.651463004476193e-06, 4.651463004476193e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1509|ppo_ep: 1|act_loss: 0.111328125|cri_loss: 0.06182861328125|unsuper_loss: 0.0
-average reward score: 4.7421875
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.58%) |Training time=0.51s (23.00%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1510|ppo_ep: 1|act_loss: 0.0236968994140625|cri_loss: 0.0250091552734375|unsuper_loss: 0.0
-average reward score: 5.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.18%) |Training time=0.49s (22.34%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.42
-[2023-04-14 09:43:20,288] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, but hysteresis is 2. Reducing hysteresis to 1
-[2023-04-14 09:43:20,373] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
-epoch: 0|step: 1511|ppo_ep: 1|act_loss: 0.0660400390625|cri_loss: 0.04168701171875|unsuper_loss: 0.0
-average reward score: 5.32421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.42%) |Training time=0.46s (21.44%) |Others=0.09 (4.14%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.42
-[2023-04-14 09:43:22,449] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096
-epoch: 0|step: 1512|ppo_ep: 1|act_loss: 0.1353759765625|cri_loss: 0.08154296875|unsuper_loss: 0.0
-average reward score: 5.49609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.78%) |Training time=0.47s (21.70%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1513|ppo_ep: 1|act_loss: 0.08203125|cri_loss: 0.04931640625|unsuper_loss: 0.0
-average reward score: 5.71875
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.66%) |Training time=0.48s (21.88%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1514|ppo_ep: 1|act_loss: -0.0709228515625|cri_loss: -0.0137939453125|unsuper_loss: 0.0
-average reward score: 5.1484375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.65%) |Training time=0.48s (21.84%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1515|ppo_ep: 1|act_loss: -0.051605224609375|cri_loss: -0.006378173828125|unsuper_loss: 0.0
-average reward score: 5.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.70%) |Training time=0.47s (21.78%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1516|ppo_ep: 1|act_loss: 0.102783203125|cri_loss: 0.0628662109375|unsuper_loss: 0.0
-average reward score: 5.38671875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.42%) |Training time=0.48s (22.09%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1517|ppo_ep: 1|act_loss: 0.07171630859375|cri_loss: 0.06170654296875|unsuper_loss: 0.0
-average reward score: 4.97265625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.24%) |Training time=0.49s (22.26%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.42
-[2023-04-14 09:43:35,676] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 4096, reducing to 2048
-epoch: 0|step: 1518|ppo_ep: 1|act_loss: 0.1497802734375|cri_loss: 0.08740234375|unsuper_loss: 0.0
-average reward score: 5.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.75s (76.08%) |Training time=0.45s (19.66%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.88 |AvgSamplesPerSec=14.42
-[2023-04-14 09:43:37,835] [INFO] [logging.py:96:log_dist] [Rank 0] step=1520, skipped=21, lr=[8.966879312536848e-06, 8.966879312536848e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:43:37,854] [INFO] [timer.py:199:stop] epoch=0/micro_step=1520/global_step=1520, RunningAvgSamplesPerSec=109.15061098093494, CurrSamplesPerSec=101.99248759647922, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:43:37,946] [INFO] [logging.py:96:log_dist] [Rank 0] step=1520, skipped=23, lr=[4.647038240668007e-06, 4.647038240668007e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1519|ppo_ep: 1|act_loss: 0.0712890625|cri_loss: 0.10894775390625|unsuper_loss: 0.0
-average reward score: 4.89453125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.62%) |Training time=0.48s (21.88%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1520|ppo_ep: 1|act_loss: 0.173583984375|cri_loss: 0.12744140625|unsuper_loss: 0.0
-average reward score: 4.77734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.84%) |Training time=0.47s (21.64%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1521|ppo_ep: 1|act_loss: 0.228759765625|cri_loss: 0.144775390625|unsuper_loss: 0.0
-average reward score: 4.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.75s |Gather latency=0.00s (0.00%) |Generate time=1.60s (58.34%) |Training time=0.48s (17.52%) |Others=0.66 (24.14%)|CurSamplesPerSec=11.65 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1522|ppo_ep: 1|act_loss: 0.2275390625|cri_loss: 0.133056640625|unsuper_loss: 0.0
-average reward score: 4.83984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.27%) |Training time=0.46s (21.15%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
-[2023-04-14 09:43:47,123] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 2048, reducing to 1024
-epoch: 0|step: 1523|ppo_ep: 1|act_loss: 0.055511474609375|cri_loss: 0.048187255859375|unsuper_loss: 0.0
-average reward score: 4.81640625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.70%) |Training time=0.48s (21.81%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1524|ppo_ep: 1|act_loss: 0.0124359130859375|cri_loss: 0.0382080078125|unsuper_loss: 0.0
-average reward score: 4.65234375
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (72.99%) |Training time=0.50s (22.55%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.46 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1525|ppo_ep: 1|act_loss: -0.3203125|cri_loss: -0.0809326171875|unsuper_loss: 0.0
-average reward score: 4.73828125
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.65s (74.79%) |Training time=0.46s (20.74%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1526|ppo_ep: 1|act_loss: 0.00335693359375|cri_loss: 0.023529052734375|unsuper_loss: 0.0
-average reward score: 4.3359375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.81%) |Training time=0.50s (22.70%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1527|ppo_ep: 1|act_loss: -0.03948974609375|cri_loss: 0.019134521484375|unsuper_loss: 0.0
-average reward score: 4.76953125
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.64%) |Training time=0.48s (21.89%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1528|ppo_ep: 1|act_loss: 0.2244873046875|cri_loss: 0.134521484375|unsuper_loss: 0.0
-average reward score: 4.3984375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.35%) |Training time=0.49s (22.18%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.42
-[2023-04-14 09:44:00,334] [INFO] [logging.py:96:log_dist] [Rank 0] step=1530, skipped=22, lr=[8.958278725693138e-06, 8.958278725693138e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:44:00,352] [INFO] [timer.py:199:stop] epoch=0/micro_step=1530/global_step=1530, RunningAvgSamplesPerSec=109.09060936426617, CurrSamplesPerSec=97.64463647454845, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:44:00,445] [INFO] [logging.py:96:log_dist] [Rank 0] step=1530, skipped=23, lr=[4.642091605675834e-06, 4.642091605675834e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1529|ppo_ep: 1|act_loss: 0.209228515625|cri_loss: 0.13525390625|unsuper_loss: 0.0
-average reward score: 4.234375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.23%) |Training time=0.49s (22.26%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.42
-[2023-04-14 09:44:02,664] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192
-epoch: 0|step: 1530|ppo_ep: 1|act_loss: -0.031982421875|cri_loss: 0.02606201171875|unsuper_loss: 0.0
-average reward score: 4.2578125
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.68s (75.53%) |Training time=0.45s (20.42%) |Others=0.09 (4.05%)|CurSamplesPerSec=14.43 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1531|ppo_ep: 1|act_loss: -0.10589599609375|cri_loss: -0.023681640625|unsuper_loss: 0.0
-average reward score: 4.4765625
--------------------------------------------------------------------------------------
-|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.71s (74.62%) |Training time=0.48s (20.78%) |Others=0.11 (4.59%)|CurSamplesPerSec=13.95 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1532|ppo_ep: 1|act_loss: -0.62890625|cri_loss: -0.153076171875|unsuper_loss: 0.0
-average reward score: 3.93359375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.27%) |Training time=0.49s (22.22%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1533|ppo_ep: 1|act_loss: 0.1341552734375|cri_loss: 0.111083984375|unsuper_loss: 0.0
-average reward score: 4.3671875
--------------------------------------------------------------------------------------
-|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.77s (74.85%) |Training time=0.50s (20.97%) |Others=0.10 (4.18%)|CurSamplesPerSec=13.53 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1534|ppo_ep: 1|act_loss: 0.129638671875|cri_loss: 0.0816650390625|unsuper_loss: 0.0
-average reward score: 3.388671875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.25%) |Training time=0.49s (22.25%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1535|ppo_ep: 1|act_loss: 0.484375|cri_loss: 0.39990234375|unsuper_loss: 0.0
-average reward score: 2.533203125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.18%) |Training time=0.49s (22.32%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1536|ppo_ep: 1|act_loss: 0.16015625|cri_loss: 0.1719970703125|unsuper_loss: 0.0
-average reward score: 2.78515625
--------------------------------------------------------------------------------------
-|E2E latency=2.88s |Gather latency=0.00s (0.00%) |Generate time=1.60s (55.54%) |Training time=0.49s (17.01%) |Others=0.79 (27.45%)|CurSamplesPerSec=11.10 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1537|ppo_ep: 1|act_loss: 0.40625|cri_loss: 0.2578125|unsuper_loss: 0.0
-average reward score: 4.32421875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.83%) |Training time=0.50s (22.67%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1538|ppo_ep: 1|act_loss: 0.3017578125|cri_loss: 0.2041015625|unsuper_loss: 0.0
-average reward score: 2.69140625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.03%) |Training time=0.49s (22.48%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.41
-[2023-04-14 09:44:23,196] [INFO] [logging.py:96:log_dist] [Rank 0] step=1540, skipped=22, lr=[8.948664320677332e-06, 8.948664320677332e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:44:23,214] [INFO] [timer.py:199:stop] epoch=0/micro_step=1540/global_step=1540, RunningAvgSamplesPerSec=109.02335196349398, CurrSamplesPerSec=101.4977854172543, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:44:23,323] [INFO] [logging.py:96:log_dist] [Rank 0] step=1540, skipped=24, lr=[4.637612485008328e-06, 4.637612485008328e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1539|ppo_ep: 1|act_loss: 0.5537109375|cri_loss: 0.375|unsuper_loss: 0.0
-average reward score: 3.1484375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.09%) |Training time=0.48s (21.79%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1540|ppo_ep: 1|act_loss: -0.2861328125|cri_loss: -0.0626220703125|unsuper_loss: 0.0
-average reward score: 3.4921875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.91%) |Training time=0.50s (22.61%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1541|ppo_ep: 1|act_loss: -0.171875|cri_loss: -0.0218505859375|unsuper_loss: 0.0
-average reward score: 4.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.28%) |Training time=0.48s (22.15%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1542|ppo_ep: 1|act_loss: 0.02923583984375|cri_loss: 0.050079345703125|unsuper_loss: 0.0
-average reward score: 3.861328125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.72%) |Training time=0.50s (22.77%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1543|ppo_ep: 1|act_loss: 0.27490234375|cri_loss: 0.1748046875|unsuper_loss: 0.0
-average reward score: 3.6015625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.09%) |Training time=0.49s (22.40%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1544|ppo_ep: 1|act_loss: 0.25048828125|cri_loss: 0.1973876953125|unsuper_loss: 0.0
-average reward score: 2.509765625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.87%) |Training time=0.49s (22.64%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1545|ppo_ep: 1|act_loss: 0.181396484375|cri_loss: 0.1395263671875|unsuper_loss: 0.0
-average reward score: 2.671875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.70%) |Training time=0.50s (22.80%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1546|ppo_ep: 1|act_loss: -0.060211181640625|cri_loss: -0.006256103515625|unsuper_loss: 0.0
-average reward score: 3.3046875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.86%) |Training time=0.49s (22.64%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1547|ppo_ep: 1|act_loss: 0.07568359375|cri_loss: 0.0791015625|unsuper_loss: 0.0
-average reward score: 2.16015625
--------------------------------------------------------------------------------------
-|E2E latency=2.43s |Gather latency=0.00s (0.00%) |Generate time=1.59s (65.40%) |Training time=0.55s (22.71%) |Others=0.29 (11.90%)|CurSamplesPerSec=13.19 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1548|ppo_ep: 1|act_loss: -0.0823974609375|cri_loss: 0.028076171875|unsuper_loss: 0.0
-average reward score: 2.818359375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.79%) |Training time=0.50s (22.70%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.41
-[2023-04-14 09:44:45,262] [INFO] [logging.py:96:log_dist] [Rank 0] step=1550, skipped=22, lr=[8.938988793008496e-06, 8.938988793008496e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:44:45,280] [INFO] [timer.py:199:stop] epoch=0/micro_step=1550/global_step=1550, RunningAvgSamplesPerSec=108.92040127458814, CurrSamplesPerSec=96.0678427100437, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:44:45,372] [INFO] [logging.py:96:log_dist] [Rank 0] step=1550, skipped=24, lr=[4.632605586260949e-06, 4.632605586260949e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1549|ppo_ep: 1|act_loss: 0.34619140625|cri_loss: 0.21337890625|unsuper_loss: 0.0
-average reward score: 2.98046875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.73%) |Training time=0.50s (22.78%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1550|ppo_ep: 1|act_loss: 0.369140625|cri_loss: 0.21630859375|unsuper_loss: 0.0
-average reward score: 3.705078125
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.14%) |Training time=0.51s (23.39%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1551|ppo_ep: 1|act_loss: 0.46142578125|cri_loss: 0.289306640625|unsuper_loss: 0.0
-average reward score: 3.84375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.66%) |Training time=0.47s (21.79%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1552|ppo_ep: 1|act_loss: 0.345703125|cri_loss: 0.218994140625|unsuper_loss: 0.0
-average reward score: 3.10546875
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.62%) |Training time=0.53s (23.93%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1553|ppo_ep: 1|act_loss: 0.163330078125|cri_loss: 0.12451171875|unsuper_loss: 0.0
-average reward score: 4.44921875
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.78%) |Training time=0.52s (23.76%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1554|ppo_ep: 1|act_loss: 0.135498046875|cri_loss: 0.0986328125|unsuper_loss: 0.0
-average reward score: 3.1015625
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.65s (74.94%) |Training time=0.45s (20.56%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1555|ppo_ep: 1|act_loss: -0.2354736328125|cri_loss: -0.0045166015625|unsuper_loss: 0.0
-average reward score: 4.90625
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.30%) |Training time=0.49s (22.20%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1556|ppo_ep: 1|act_loss: 0.00152587890625|cri_loss: 0.057708740234375|unsuper_loss: 0.0
-average reward score: 2.7890625
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.18%) |Training time=0.49s (22.34%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1557|ppo_ep: 1|act_loss: 0.43017578125|cri_loss: 0.260009765625|unsuper_loss: 0.0
-average reward score: 2.853515625
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.23%) |Training time=0.49s (22.30%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.51 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1558|ppo_ep: 1|act_loss: 0.15673828125|cri_loss: 0.125244140625|unsuper_loss: 0.0
-average reward score: 2.9765625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.13%) |Training time=0.47s (21.36%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.41
-[2023-04-14 09:45:07,278] [INFO] [logging.py:96:log_dist] [Rank 0] step=1560, skipped=22, lr=[8.929252286101288e-06, 8.929252286101288e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:45:07,296] [INFO] [timer.py:199:stop] epoch=0/micro_step=1560/global_step=1560, RunningAvgSamplesPerSec=108.83748797941826, CurrSamplesPerSec=101.76667574004247, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:45:07,389] [INFO] [logging.py:96:log_dist] [Rank 0] step=1560, skipped=24, lr=[4.62756707715415e-06, 4.62756707715415e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1559|ppo_ep: 1|act_loss: 0.161865234375|cri_loss: 0.1217041015625|unsuper_loss: 0.0
-average reward score: 3.515625
--------------------------------------------------------------------------------------
-|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.66s (74.22%) |Training time=0.48s (21.37%) |Others=0.10 (4.41%)|CurSamplesPerSec=14.34 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1560|ppo_ep: 1|act_loss: 0.08599853515625|cri_loss: 0.06768798828125|unsuper_loss: 0.0
-average reward score: 3.03515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.92%) |Training time=0.45s (20.56%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1561|ppo_ep: 1|act_loss: 0.1705322265625|cri_loss: 0.1212158203125|unsuper_loss: 0.0
-average reward score: 4.92578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.54%) |Training time=0.45s (20.90%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1562|ppo_ep: 1|act_loss: 0.094970703125|cri_loss: 0.07098388671875|unsuper_loss: 0.0
-average reward score: 2.849609375
--------------------------------------------------------------------------------------
-|E2E latency=2.79s |Gather latency=0.00s (0.00%) |Generate time=1.77s (63.41%) |Training time=0.49s (17.69%) |Others=0.53 (18.90%)|CurSamplesPerSec=11.45 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1563|ppo_ep: 1|act_loss: 0.21337890625|cri_loss: 0.126708984375|unsuper_loss: 0.0
-average reward score: 3.6875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.25%) |Training time=0.46s (21.13%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1564|ppo_ep: 1|act_loss: 0.0687255859375|cri_loss: 0.06353759765625|unsuper_loss: 0.0
-average reward score: 4.07421875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.20%) |Training time=0.47s (21.31%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1565|ppo_ep: 1|act_loss: 0.040740966796875|cri_loss: 0.034027099609375|unsuper_loss: 0.0
-average reward score: 4.078125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.59%) |Training time=0.47s (21.59%) |Others=0.11 (4.81%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1566|ppo_ep: 1|act_loss: 0.20751953125|cri_loss: 0.12158203125|unsuper_loss: 0.0
-average reward score: 3.55859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.58%) |Training time=0.48s (21.90%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1567|ppo_ep: 1|act_loss: 0.0308380126953125|cri_loss: 0.02667236328125|unsuper_loss: 0.0
-average reward score: 2.984375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.80%) |Training time=0.48s (21.73%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1568|ppo_ep: 1|act_loss: 0.1075439453125|cri_loss: 0.0609130859375|unsuper_loss: 0.0
-average reward score: 4.5625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.08%) |Training time=0.47s (21.40%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.41
-[2023-04-14 09:45:29,668] [INFO] [logging.py:96:log_dist] [Rank 0] step=1570, skipped=22, lr=[8.919454944274233e-06, 8.919454944274233e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:45:29,686] [INFO] [timer.py:199:stop] epoch=0/micro_step=1570/global_step=1570, RunningAvgSamplesPerSec=108.8116418141218, CurrSamplesPerSec=105.11076914161218, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:45:29,778] [INFO] [logging.py:96:log_dist] [Rank 0] step=1570, skipped=24, lr=[4.622497032370792e-06, 4.622497032370792e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1569|ppo_ep: 1|act_loss: -0.0347900390625|cri_loss: -0.00799560546875|unsuper_loss: 0.0
-average reward score: 3.5234375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.04%) |Training time=0.47s (21.49%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1570|ppo_ep: 1|act_loss: 0.10052490234375|cri_loss: 0.06103515625|unsuper_loss: 0.0
-average reward score: 3.310546875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.01%) |Training time=0.47s (21.48%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1571|ppo_ep: 1|act_loss: -0.04644775390625|cri_loss: -0.01904296875|unsuper_loss: 0.0
-average reward score: 2.890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.39%) |Training time=0.46s (21.06%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1572|ppo_ep: 1|act_loss: -0.6005859375|cri_loss: -0.1934814453125|unsuper_loss: 0.0
-average reward score: 2.80078125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.43%) |Training time=0.48s (22.06%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1573|ppo_ep: 1|act_loss: -0.2471923828125|cri_loss: -0.1044921875|unsuper_loss: 0.0
-average reward score: 2.806640625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.58%) |Training time=0.48s (21.92%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1574|ppo_ep: 1|act_loss: 0.23974609375|cri_loss: 0.1318359375|unsuper_loss: 0.0
-average reward score: 3.423828125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.47%) |Training time=0.48s (22.02%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1575|ppo_ep: 1|act_loss: 0.115478515625|cri_loss: 0.07293701171875|unsuper_loss: 0.0
-average reward score: 3.80859375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.76%) |Training time=0.47s (21.73%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1576|ppo_ep: 1|act_loss: 0.1158447265625|cri_loss: 0.0618896484375|unsuper_loss: 0.0
-average reward score: 3.185546875
--------------------------------------------------------------------------------------
-|E2E latency=2.83s |Gather latency=0.00s (0.00%) |Generate time=1.77s (62.47%) |Training time=0.48s (16.92%) |Others=0.58 (20.61%)|CurSamplesPerSec=11.32 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1577|ppo_ep: 1|act_loss: -0.0975341796875|cri_loss: -0.0289306640625|unsuper_loss: 0.0
-average reward score: 3.537109375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.50%) |Training time=0.48s (22.00%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1578|ppo_ep: 1|act_loss: 0.1142578125|cri_loss: 0.0662841796875|unsuper_loss: 0.0
-average reward score: 3.919921875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.58%) |Training time=0.48s (21.90%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.41
-[2023-04-14 09:45:52,093] [INFO] [logging.py:96:log_dist] [Rank 0] step=1580, skipped=22, lr=[8.909596912747568e-06, 8.909596912747568e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:45:52,111] [INFO] [timer.py:199:stop] epoch=0/micro_step=1580/global_step=1580, RunningAvgSamplesPerSec=108.76884252619337, CurrSamplesPerSec=101.50047189216659, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:45:52,204] [INFO] [logging.py:96:log_dist] [Rank 0] step=1580, skipped=24, lr=[4.617395527061168e-06, 4.617395527061168e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1579|ppo_ep: 1|act_loss: -0.0958251953125|cri_loss: -0.0352783203125|unsuper_loss: 0.0
-average reward score: 4.23046875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.55%) |Training time=0.48s (21.96%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1580|ppo_ep: 1|act_loss: -0.014373779296875|cri_loss: 0.0070953369140625|unsuper_loss: 0.0
-average reward score: 4.1953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.62%) |Training time=0.47s (21.85%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1581|ppo_ep: 1|act_loss: 0.1207275390625|cri_loss: 0.0650634765625|unsuper_loss: 0.0
-average reward score: 3.24609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.62%) |Training time=0.47s (21.85%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1582|ppo_ep: 1|act_loss: 0.0110321044921875|cri_loss: 0.020233154296875|unsuper_loss: 0.0
-average reward score: 4.88671875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.51%) |Training time=0.48s (21.97%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1583|ppo_ep: 1|act_loss: -0.049652099609375|cri_loss: -0.022430419921875|unsuper_loss: 0.0
-average reward score: 3.423828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.67%) |Training time=0.47s (21.82%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1584|ppo_ep: 1|act_loss: 0.0467529296875|cri_loss: 0.033203125|unsuper_loss: 0.0
-average reward score: 3.763671875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.70%) |Training time=0.47s (21.80%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1585|ppo_ep: 1|act_loss: -0.01050567626953125|cri_loss: 0.001220703125|unsuper_loss: 0.0
-average reward score: 3.591796875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.59%) |Training time=0.48s (21.80%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1586|ppo_ep: 1|act_loss: 0.107666015625|cri_loss: 0.0589599609375|unsuper_loss: 0.0
-average reward score: 3.427734375
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.29%) |Training time=0.51s (23.19%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.44 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1587|ppo_ep: 1|act_loss: 0.03472900390625|cri_loss: 0.0185546875|unsuper_loss: 0.0
-average reward score: 4.4609375
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.00%) |Training time=0.49s (22.16%) |Others=0.11 (4.84%)|CurSamplesPerSec=14.51 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1588|ppo_ep: 1|act_loss: 0.060333251953125|cri_loss: 0.0380859375|unsuper_loss: 0.0
-average reward score: 3.052734375
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.63s (70.59%) |Training time=0.58s (25.15%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.82 |AvgSamplesPerSec=14.41
-[2023-04-14 09:46:14,048] [INFO] [logging.py:96:log_dist] [Rank 0] step=1590, skipped=22, lr=[8.899678337641102e-06, 8.899678337641102e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:46:14,066] [INFO] [timer.py:199:stop] epoch=0/micro_step=1590/global_step=1590, RunningAvgSamplesPerSec=108.69881169225158, CurrSamplesPerSec=101.52227597704174, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:46:14,159] [INFO] [logging.py:96:log_dist] [Rank 0] step=1590, skipped=24, lr=[4.612262636841895e-06, 4.612262636841895e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1589|ppo_ep: 1|act_loss: -0.0256195068359375|cri_loss: -0.009033203125|unsuper_loss: 0.0
-average reward score: 3.4765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.52%) |Training time=0.48s (21.97%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1590|ppo_ep: 1|act_loss: 0.00982666015625|cri_loss: 0.0081329345703125|unsuper_loss: 0.0
-average reward score: 3.65234375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.38%) |Training time=0.48s (22.09%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1591|ppo_ep: 1|act_loss: 0.06524658203125|cri_loss: 0.035430908203125|unsuper_loss: 0.0
-average reward score: 3.935546875
--------------------------------------------------------------------------------------
-|E2E latency=2.44s |Gather latency=0.00s (0.00%) |Generate time=1.77s (72.45%) |Training time=0.49s (20.24%) |Others=0.18 (7.31%)|CurSamplesPerSec=13.10 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1592|ppo_ep: 1|act_loss: 0.037811279296875|cri_loss: 0.021453857421875|unsuper_loss: 0.0
-average reward score: 3.34765625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.26%) |Training time=0.48s (22.22%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1593|ppo_ep: 1|act_loss: 0.0085601806640625|cri_loss: 0.006603240966796875|unsuper_loss: 0.0
-average reward score: 4.12890625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.57%) |Training time=0.48s (21.90%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1594|ppo_ep: 1|act_loss: -0.097412109375|cri_loss: -0.033599853515625|unsuper_loss: 0.0
-average reward score: 4.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.39%) |Training time=0.49s (22.13%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1595|ppo_ep: 1|act_loss: -0.043792724609375|cri_loss: -0.01497650146484375|unsuper_loss: 0.0
-average reward score: 3.98046875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.04%) |Training time=0.49s (22.45%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1596|ppo_ep: 1|act_loss: 0.02978515625|cri_loss: 0.022247314453125|unsuper_loss: 0.0
-average reward score: 4.1796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.12%) |Training time=0.46s (21.35%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1597|ppo_ep: 1|act_loss: 0.091796875|cri_loss: 0.0494384765625|unsuper_loss: 0.0
-average reward score: 3.82421875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.29%) |Training time=0.49s (22.21%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1598|ppo_ep: 1|act_loss: 0.023773193359375|cri_loss: 0.0134124755859375|unsuper_loss: 0.0
-average reward score: 5.109375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.49%) |Training time=0.48s (22.00%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.41
-[2023-04-14 09:46:36,122] [INFO] [logging.py:96:log_dist] [Rank 0] step=1600, skipped=22, lr=[8.889699365972046e-06, 8.889699365972046e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:46:36,140] [INFO] [timer.py:199:stop] epoch=0/micro_step=1600/global_step=1600, RunningAvgSamplesPerSec=108.6402557626682, CurrSamplesPerSec=100.86045297044397, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:46:36,232] [INFO] [logging.py:96:log_dist] [Rank 0] step=1600, skipped=24, lr=[4.6070984377947884e-06, 4.6070984377947884e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1599|ppo_ep: 1|act_loss: 0.02203369140625|cri_loss: 0.0122222900390625|unsuper_loss: 0.0
-average reward score: 4.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.50%) |Training time=0.48s (22.01%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1600|ppo_ep: 1|act_loss: -0.00275421142578125|cri_loss: 0.000911712646484375|unsuper_loss: 0.0
-average reward score: 4.296875
--------------------------------------------------------------------------------------
-|E2E latency=3.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (50.51%) |Training time=0.48s (15.23%) |Others=1.09 (34.26%)|CurSamplesPerSec=10.08 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1601|ppo_ep: 1|act_loss: -0.01715087890625|cri_loss: -0.007396697998046875|unsuper_loss: 0.0
-average reward score: 3.7578125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.62%) |Training time=0.48s (21.85%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1602|ppo_ep: 1|act_loss: -0.04461669921875|cri_loss: -0.0196533203125|unsuper_loss: 0.0
-average reward score: 3.693359375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.58%) |Training time=0.48s (21.91%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1603|ppo_ep: 1|act_loss: 0.00501251220703125|cri_loss: 0.005153656005859375|unsuper_loss: 0.0
-average reward score: 3.765625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.59%) |Training time=0.48s (21.91%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1604|ppo_ep: 1|act_loss: -0.03546142578125|cri_loss: -0.015838623046875|unsuper_loss: 0.0
-average reward score: 4.3125
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.78%) |Training time=0.47s (21.57%) |Others=0.12 (5.65%)|CurSamplesPerSec=14.54 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1605|ppo_ep: 1|act_loss: 0.10308837890625|cri_loss: 0.058502197265625|unsuper_loss: 0.0
-average reward score: 4.0078125
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.73s (74.31%) |Training time=0.49s (21.15%) |Others=0.11 (4.54%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1606|ppo_ep: 1|act_loss: -0.0546875|cri_loss: -0.02545166015625|unsuper_loss: 0.0
-average reward score: 3.8046875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.69%) |Training time=0.47s (21.79%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1607|ppo_ep: 1|act_loss: 0.0222930908203125|cri_loss: 0.01284027099609375|unsuper_loss: 0.0
-average reward score: 4.12890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.96%) |Training time=0.47s (21.51%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1608|ppo_ep: 1|act_loss: -0.04022216796875|cri_loss: -0.0186004638671875|unsuper_loss: 0.0
-average reward score: 4.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.68%) |Training time=0.47s (21.79%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41
-[2023-04-14 09:46:59,030] [INFO] [logging.py:96:log_dist] [Rank 0] step=1610, skipped=22, lr=[8.879660145652832e-06, 8.879660145652832e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:46:59,048] [INFO] [timer.py:199:stop] epoch=0/micro_step=1610/global_step=1610, RunningAvgSamplesPerSec=108.59716815707017, CurrSamplesPerSec=105.74269943889715, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:46:59,141] [INFO] [logging.py:96:log_dist] [Rank 0] step=1610, skipped=24, lr=[4.601903006465734e-06, 4.601903006465734e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1609|ppo_ep: 1|act_loss: -0.0167388916015625|cri_loss: -0.006267547607421875|unsuper_loss: 0.0
-average reward score: 4.0703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.47s (21.47%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1610|ppo_ep: 1|act_loss: -0.02960205078125|cri_loss: -0.01259613037109375|unsuper_loss: 0.0
-average reward score: 4.08984375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.59%) |Training time=0.48s (21.92%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1611|ppo_ep: 1|act_loss: -0.00969696044921875|cri_loss: -0.00272369384765625|unsuper_loss: 0.0
-average reward score: 5.1171875
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.61s (72.18%) |Training time=0.51s (22.87%) |Others=0.11 (4.94%)|CurSamplesPerSec=14.38 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1612|ppo_ep: 1|act_loss: 0.0380859375|cri_loss: 0.02044677734375|unsuper_loss: 0.0
-average reward score: 3.4609375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.07%) |Training time=0.49s (22.44%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.54 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1613|ppo_ep: 1|act_loss: 0.06634521484375|cri_loss: 0.03521728515625|unsuper_loss: 0.0
-average reward score: 4.12890625
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.14%) |Training time=0.45s (20.37%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1614|ppo_ep: 1|act_loss: 0.0743408203125|cri_loss: 0.03887939453125|unsuper_loss: 0.0
-average reward score: 4.20703125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.39%) |Training time=0.49s (22.12%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1615|ppo_ep: 1|act_loss: 0.0175018310546875|cri_loss: 0.01300048828125|unsuper_loss: 0.0
-average reward score: 4.3515625
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.26%) |Training time=0.49s (22.05%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1616|ppo_ep: 1|act_loss: 0.02777099609375|cri_loss: 0.0153656005859375|unsuper_loss: 0.0
-average reward score: 3.30859375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.79%) |Training time=0.48s (21.71%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1617|ppo_ep: 1|act_loss: -0.039794921875|cri_loss: -0.0181884765625|unsuper_loss: 0.0
-average reward score: 4.49609375
--------------------------------------------------------------------------------------
-|E2E latency=2.42s |Gather latency=0.00s (0.00%) |Generate time=1.73s (71.29%) |Training time=0.48s (19.72%) |Others=0.22 (8.99%)|CurSamplesPerSec=13.21 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1618|ppo_ep: 1|act_loss: 0.07086181640625|cri_loss: 0.03765869140625|unsuper_loss: 0.0
-average reward score: 3.755859375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.71%) |Training time=0.45s (20.78%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.41
-[2023-04-14 09:47:21,223] [INFO] [logging.py:96:log_dist] [Rank 0] step=1620, skipped=22, lr=[8.869560825488926e-06, 8.869560825488926e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:47:21,241] [INFO] [timer.py:199:stop] epoch=0/micro_step=1620/global_step=1620, RunningAvgSamplesPerSec=108.55081239645398, CurrSamplesPerSec=106.41888138557795, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:47:21,334] [INFO] [logging.py:96:log_dist] [Rank 0] step=1620, skipped=24, lr=[4.596676419863561e-06, 4.596676419863561e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1619|ppo_ep: 1|act_loss: 0.0615234375|cri_loss: 0.03411865234375|unsuper_loss: 0.0
-average reward score: 4.44140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.12%) |Training time=0.46s (21.36%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1620|ppo_ep: 1|act_loss: 0.024871826171875|cri_loss: 0.01503753662109375|unsuper_loss: 0.0
-average reward score: 3.728515625
--------------------------------------------------------------------------------------
-|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.77s (74.71%) |Training time=0.50s (21.13%) |Others=0.10 (4.16%)|CurSamplesPerSec=13.50 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1621|ppo_ep: 1|act_loss: -0.0162811279296875|cri_loss: -0.007350921630859375|unsuper_loss: 0.0
-average reward score: 5.1328125
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.84%) |Training time=0.48s (21.69%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1622|ppo_ep: 1|act_loss: -0.02630615234375|cri_loss: -0.01035308837890625|unsuper_loss: 0.0
-average reward score: 4.40625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.77%) |Training time=0.47s (21.71%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1623|ppo_ep: 1|act_loss: 0.03363037109375|cri_loss: 0.0177154541015625|unsuper_loss: 0.0
-average reward score: 3.87109375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.55%) |Training time=0.48s (21.93%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1624|ppo_ep: 1|act_loss: 0.005886077880859375|cri_loss: 0.003627777099609375|unsuper_loss: 0.0
-average reward score: 3.359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.09%) |Training time=0.49s (22.37%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1625|ppo_ep: 1|act_loss: 0.0280609130859375|cri_loss: 0.01690673828125|unsuper_loss: 0.0
-average reward score: 2.912109375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.24%) |Training time=0.46s (21.22%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1626|ppo_ep: 1|act_loss: -0.050811767578125|cri_loss: -0.021881103515625|unsuper_loss: 0.0
-average reward score: 4.94921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.40%) |Training time=0.48s (22.06%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1627|ppo_ep: 1|act_loss: -0.05517578125|cri_loss: -0.0225067138671875|unsuper_loss: 0.0
-average reward score: 4.9921875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.55%) |Training time=0.48s (21.94%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1628|ppo_ep: 1|act_loss: 0.07183837890625|cri_loss: 0.039794921875|unsuper_loss: 0.0
-average reward score: 4.18359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.55%) |Training time=0.48s (21.92%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
-[2023-04-14 09:47:43,177] [INFO] [logging.py:96:log_dist] [Rank 0] step=1630, skipped=22, lr=[8.859401555176615e-06, 8.859401555176615e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:47:43,482] [INFO] [timer.py:199:stop] epoch=0/micro_step=1630/global_step=1630, RunningAvgSamplesPerSec=108.43823317583377, CurrSamplesPerSec=53.26256723411719, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:47:43,575] [INFO] [logging.py:96:log_dist] [Rank 0] step=1630, skipped=24, lr=[4.591418755458887e-06, 4.591418755458887e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1629|ppo_ep: 1|act_loss: 0.09765625|cri_loss: 0.051177978515625|unsuper_loss: 0.0
-average reward score: 3.75390625
--------------------------------------------------------------------------------------
-|E2E latency=2.45s |Gather latency=0.00s (0.00%) |Generate time=1.59s (64.92%) |Training time=0.76s (31.10%) |Others=0.10 (3.99%)|CurSamplesPerSec=13.04 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1630|ppo_ep: 1|act_loss: 0.0201416015625|cri_loss: 0.0125274658203125|unsuper_loss: 0.0
-average reward score: 4.66796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.48s (22.00%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1631|ppo_ep: 1|act_loss: 0.018310546875|cri_loss: 0.0108489990234375|unsuper_loss: 0.0
-average reward score: 4.23046875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.19%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1632|ppo_ep: 1|act_loss: 0.0361328125|cri_loss: 0.019378662109375|unsuper_loss: 0.0
-average reward score: 4.4921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.13%) |Training time=0.48s (22.33%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1633|ppo_ep: 1|act_loss: -0.019989013671875|cri_loss: -0.00919342041015625|unsuper_loss: 0.0
-average reward score: 4.1875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.23%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1634|ppo_ep: 1|act_loss: 0.031402587890625|cri_loss: 0.01763916015625|unsuper_loss: 0.0
-average reward score: 4.09375
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.64s (71.03%) |Training time=0.57s (24.67%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1635|ppo_ep: 1|act_loss: 0.04095458984375|cri_loss: 0.0223541259765625|unsuper_loss: 0.0
-average reward score: 3.7890625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.86%) |Training time=0.49s (22.63%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1636|ppo_ep: 1|act_loss: -0.003795623779296875|cri_loss: -0.0005779266357421875|unsuper_loss: 0.0
-average reward score: 3.61328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.04%) |Training time=0.49s (22.44%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1637|ppo_ep: 1|act_loss: -0.004100799560546875|cri_loss: -0.0005321502685546875|unsuper_loss: 0.0
-average reward score: 4.2109375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.02%) |Training time=0.49s (22.47%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1638|ppo_ep: 1|act_loss: 0.043060302734375|cri_loss: 0.02545166015625|unsuper_loss: 0.0
-average reward score: 3.30859375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.82%) |Training time=0.49s (22.68%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.41
-[2023-04-14 09:48:05,298] [INFO] [logging.py:96:log_dist] [Rank 0] step=1640, skipped=22, lr=[8.849182485300792e-06, 8.849182485300792e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:48:05,316] [INFO] [timer.py:199:stop] epoch=0/micro_step=1640/global_step=1640, RunningAvgSamplesPerSec=108.37462570038971, CurrSamplesPerSec=107.1831391727385, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:48:05,409] [INFO] [logging.py:96:log_dist] [Rank 0] step=1640, skipped=24, lr=[4.586130091182985e-06, 4.586130091182985e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1639|ppo_ep: 1|act_loss: 0.005695343017578125|cri_loss: 0.00555419921875|unsuper_loss: 0.0
-average reward score: 4.1953125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.90%) |Training time=0.46s (21.53%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1640|ppo_ep: 1|act_loss: -0.004543304443359375|cri_loss: -0.0017652511596679688|unsuper_loss: 0.0
-average reward score: 4.12890625
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.59s (70.44%) |Training time=0.47s (20.68%) |Others=0.20 (8.88%)|CurSamplesPerSec=14.19 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1641|ppo_ep: 1|act_loss: 0.014434814453125|cri_loss: 0.00841522216796875|unsuper_loss: 0.0
-average reward score: 3.341796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.97%) |Training time=0.49s (22.52%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1642|ppo_ep: 1|act_loss: -0.00848388671875|cri_loss: -0.003963470458984375|unsuper_loss: 0.0
-average reward score: 5.80078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.21%) |Training time=0.48s (22.28%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1643|ppo_ep: 1|act_loss: 0.031829833984375|cri_loss: 0.016815185546875|unsuper_loss: 0.0
-average reward score: 5.17578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.15%) |Training time=0.48s (22.31%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1644|ppo_ep: 1|act_loss: 0.0106048583984375|cri_loss: 0.00621795654296875|unsuper_loss: 0.0
-average reward score: 4.61328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.23%) |Training time=0.48s (22.24%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1645|ppo_ep: 1|act_loss: 0.038177490234375|cri_loss: 0.0225982666015625|unsuper_loss: 0.0
-average reward score: 3.908203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.64%) |Training time=0.47s (21.81%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1646|ppo_ep: 1|act_loss: -0.0369873046875|cri_loss: -0.017547607421875|unsuper_loss: 0.0
-average reward score: 4.453125
--------------------------------------------------------------------------------------
-|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.71s (74.11%) |Training time=0.49s (21.49%) |Others=0.10 (4.41%)|CurSamplesPerSec=13.89 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1647|ppo_ep: 1|act_loss: -0.051971435546875|cri_loss: -0.0243682861328125|unsuper_loss: 0.0
-average reward score: 3.453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.06%) |Training time=0.49s (22.41%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1648|ppo_ep: 1|act_loss: 0.01430511474609375|cri_loss: 0.00861358642578125|unsuper_loss: 0.0
-average reward score: 4.2734375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.17%) |Training time=0.49s (22.36%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.41
-[2023-04-14 09:48:27,226] [INFO] [logging.py:96:log_dist] [Rank 0] step=1650, skipped=22, lr=[8.838903767332725e-06, 8.838903767332725e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:48:27,244] [INFO] [timer.py:199:stop] epoch=0/micro_step=1650/global_step=1650, RunningAvgSamplesPerSec=108.32006295141781, CurrSamplesPerSec=100.62015510849723, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:48:27,337] [INFO] [logging.py:96:log_dist] [Rank 0] step=1650, skipped=24, lr=[4.580810505426617e-06, 4.580810505426617e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1649|ppo_ep: 1|act_loss: 0.0341796875|cri_loss: 0.018310546875|unsuper_loss: 0.0
-average reward score: 3.806640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.26%) |Training time=0.48s (22.22%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1650|ppo_ep: 1|act_loss: 0.0545654296875|cri_loss: 0.029083251953125|unsuper_loss: 0.0
-average reward score: 4.59375
--------------------------------------------------------------------------------------
-|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.12%) |Training time=0.51s (21.64%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.55 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1651|ppo_ep: 1|act_loss: 0.0146026611328125|cri_loss: 0.0078277587890625|unsuper_loss: 0.0
-average reward score: 4.9375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.82%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1652|ppo_ep: 1|act_loss: -0.00402069091796875|cri_loss: -0.0014247894287109375|unsuper_loss: 0.0
-average reward score: 4.61328125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.87%) |Training time=0.49s (22.63%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1653|ppo_ep: 1|act_loss: 0.00786590576171875|cri_loss: 0.00921630859375|unsuper_loss: 0.0
-average reward score: 3.876953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.14%) |Training time=0.48s (22.31%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1654|ppo_ep: 1|act_loss: -0.023162841796875|cri_loss: -0.0103912353515625|unsuper_loss: 0.0
-average reward score: 4.1875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.44%) |Training time=0.47s (22.00%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1655|ppo_ep: 1|act_loss: -0.0096435546875|cri_loss: -0.004184722900390625|unsuper_loss: 0.0
-average reward score: 4.55859375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.16%) |Training time=0.49s (22.25%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1656|ppo_ep: 1|act_loss: 0.018157958984375|cri_loss: 0.011199951171875|unsuper_loss: 0.0
-average reward score: 5.671875
--------------------------------------------------------------------------------------
-|E2E latency=2.46s |Gather latency=0.00s (0.00%) |Generate time=1.59s (64.54%) |Training time=0.45s (18.46%) |Others=0.42 (17.01%)|CurSamplesPerSec=13.00 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1657|ppo_ep: 1|act_loss: 0.024444580078125|cri_loss: 0.01318359375|unsuper_loss: 0.0
-average reward score: 4.3515625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.73%) |Training time=0.46s (21.69%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1658|ppo_ep: 1|act_loss: 0.024383544921875|cri_loss: 0.0128021240234375|unsuper_loss: 0.0
-average reward score: 3.88671875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.81%) |Training time=0.46s (21.62%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.41
-[2023-04-14 09:48:49,312] [INFO] [logging.py:96:log_dist] [Rank 0] step=1660, skipped=22, lr=[8.82856555362781e-06, 8.82856555362781e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:48:49,331] [INFO] [timer.py:199:stop] epoch=0/micro_step=1660/global_step=1660, RunningAvgSamplesPerSec=108.27997592914936, CurrSamplesPerSec=107.1406158574308, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:48:49,423] [INFO] [logging.py:96:log_dist] [Rank 0] step=1660, skipped=24, lr=[4.575460077038877e-06, 4.575460077038877e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1659|ppo_ep: 1|act_loss: 0.0134429931640625|cri_loss: 0.0074615478515625|unsuper_loss: 0.0
-average reward score: 3.8671875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.86%) |Training time=0.46s (21.57%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1660|ppo_ep: 1|act_loss: -0.0192413330078125|cri_loss: -0.009063720703125|unsuper_loss: 0.0
-average reward score: 4.8671875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.71%) |Training time=0.47s (21.71%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1661|ppo_ep: 1|act_loss: -0.00200653076171875|cri_loss: -0.00023174285888671875|unsuper_loss: 0.0
-average reward score: 4.94140625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.91%) |Training time=0.46s (21.51%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1662|ppo_ep: 1|act_loss: -0.0095977783203125|cri_loss: -0.003932952880859375|unsuper_loss: 0.0
-average reward score: 4.2421875
--------------------------------------------------------------------------------------
-|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.58s (69.08%) |Training time=0.46s (20.33%) |Others=0.24 (10.59%)|CurSamplesPerSec=13.99 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1663|ppo_ep: 1|act_loss: 0.002063751220703125|cri_loss: 0.0014085769653320312|unsuper_loss: 0.0
-average reward score: 5.265625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.59%) |Training time=0.47s (21.82%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1664|ppo_ep: 1|act_loss: 0.04315185546875|cri_loss: 0.0236358642578125|unsuper_loss: 0.0
-average reward score: 3.734375
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.57s (71.26%) |Training time=0.48s (21.65%) |Others=0.16 (7.09%)|CurSamplesPerSec=14.49 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1665|ppo_ep: 1|act_loss: -0.00046062469482421875|cri_loss: 0.0005645751953125|unsuper_loss: 0.0
-average reward score: 4.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.12%) |Training time=0.47s (21.71%) |Others=0.11 (5.17%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1666|ppo_ep: 1|act_loss: 0.0020160675048828125|cri_loss: 0.0041656494140625|unsuper_loss: 0.0
-average reward score: 3.298828125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.68%) |Training time=0.47s (21.76%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1667|ppo_ep: 1|act_loss: -0.0254974365234375|cri_loss: -0.01202392578125|unsuper_loss: 0.0
-average reward score: 5.234375
--------------------------------------------------------------------------------------
-|E2E latency=2.46s |Gather latency=0.00s (0.00%) |Generate time=1.58s (64.14%) |Training time=0.47s (19.15%) |Others=0.41 (16.71%)|CurSamplesPerSec=13.01 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1668|ppo_ep: 1|act_loss: 0.020050048828125|cri_loss: 0.010650634765625|unsuper_loss: 0.0
-average reward score: 3.779296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.48%) |Training time=0.47s (21.95%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.41
-[2023-04-14 09:49:11,326] [INFO] [logging.py:96:log_dist] [Rank 0] step=1670, skipped=22, lr=[8.818167997423314e-06, 8.818167997423314e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:49:11,344] [INFO] [timer.py:199:stop] epoch=0/micro_step=1670/global_step=1670, RunningAvgSamplesPerSec=108.25653389955217, CurrSamplesPerSec=106.11741265840399, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:49:11,437] [INFO] [logging.py:96:log_dist] [Rank 0] step=1670, skipped=24, lr=[4.5700788853260205e-06, 4.5700788853260205e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1669|ppo_ep: 1|act_loss: -0.01548004150390625|cri_loss: -0.00714111328125|unsuper_loss: 0.0
-average reward score: 4.375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.46s (21.61%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1670|ppo_ep: 1|act_loss: 0.02532958984375|cri_loss: 0.014007568359375|unsuper_loss: 0.0
-average reward score: 4.796875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.14%) |Training time=0.49s (22.23%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1671|ppo_ep: 1|act_loss: -0.00852203369140625|cri_loss: -0.00392913818359375|unsuper_loss: 0.0
-average reward score: 5.30078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.21%) |Training time=0.48s (22.27%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1672|ppo_ep: 1|act_loss: 0.0172882080078125|cri_loss: 0.0097808837890625|unsuper_loss: 0.0
-average reward score: 4.484375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.97%) |Training time=0.49s (22.54%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1673|ppo_ep: 1|act_loss: 0.00482177734375|cri_loss: 0.003299713134765625|unsuper_loss: 0.0
-average reward score: 4.30078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.85%) |Training time=0.49s (22.64%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1674|ppo_ep: 1|act_loss: 0.015869140625|cri_loss: 0.00927734375|unsuper_loss: 0.0
-average reward score: 4.89453125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.95%) |Training time=0.49s (22.55%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1675|ppo_ep: 1|act_loss: 0.0238037109375|cri_loss: 0.01232147216796875|unsuper_loss: 0.0
-average reward score: 4.6171875
--------------------------------------------------------------------------------------
-|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.64s (73.03%) |Training time=0.50s (22.54%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.28 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1676|ppo_ep: 1|act_loss: 0.049346923828125|cri_loss: 0.0260009765625|unsuper_loss: 0.0
-average reward score: 3.904296875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.80%) |Training time=0.49s (22.66%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1677|ppo_ep: 1|act_loss: 0.0116119384765625|cri_loss: 0.006591796875|unsuper_loss: 0.0
-average reward score: 4.5390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.73%) |Training time=0.49s (22.76%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1678|ppo_ep: 1|act_loss: 0.05316162109375|cri_loss: 0.027618408203125|unsuper_loss: 0.0
-average reward score: 3.74609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.91%) |Training time=0.49s (22.54%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42
-[2023-04-14 09:49:33,142] [INFO] [logging.py:96:log_dist] [Rank 0] step=1680, skipped=22, lr=[8.807711252836105e-06, 8.807711252836105e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:49:33,906] [INFO] [timer.py:199:stop] epoch=0/micro_step=1680/global_step=1680, RunningAvgSamplesPerSec=108.02099512384166, CurrSamplesPerSec=29.835284744846422, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:49:34,000] [INFO] [logging.py:96:log_dist] [Rank 0] step=1680, skipped=24, lr=[4.564667010050288e-06, 4.564667010050288e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1679|ppo_ep: 1|act_loss: -0.0125732421875|cri_loss: -0.0058746337890625|unsuper_loss: 0.0
-average reward score: 5.4296875
--------------------------------------------------------------------------------------
-|E2E latency=2.91s |Gather latency=0.00s (0.00%) |Generate time=1.58s (54.17%) |Training time=1.24s (42.44%) |Others=0.10 (3.39%)|CurSamplesPerSec=10.98 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1680|ppo_ep: 1|act_loss: -0.021759033203125|cri_loss: -0.01035308837890625|unsuper_loss: 0.0
-average reward score: 4.453125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.60%) |Training time=0.45s (20.84%) |Others=0.14 (6.57%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1681|ppo_ep: 1|act_loss: -0.0322265625|cri_loss: -0.0152130126953125|unsuper_loss: 0.0
-average reward score: 3.94921875
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.95%) |Training time=0.47s (20.70%) |Others=0.10 (4.35%)|CurSamplesPerSec=14.12 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1682|ppo_ep: 1|act_loss: -0.0018930435180664062|cri_loss: -3.147125244140625e-05|unsuper_loss: 0.0
-average reward score: 4.015625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.49%) |Training time=0.47s (21.94%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1683|ppo_ep: 1|act_loss: 0.02203369140625|cri_loss: 0.0120391845703125|unsuper_loss: 0.0
-average reward score: 3.998046875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.30%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1684|ppo_ep: 1|act_loss: 0.03509521484375|cri_loss: 0.01904296875|unsuper_loss: 0.0
-average reward score: 3.82421875
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.59s (70.07%) |Training time=0.47s (20.92%) |Others=0.20 (9.01%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1685|ppo_ep: 1|act_loss: -0.00547027587890625|cri_loss: -0.0011768341064453125|unsuper_loss: 0.0
-average reward score: 4.7265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.10%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1686|ppo_ep: 1|act_loss: -0.033355712890625|cri_loss: -0.01560211181640625|unsuper_loss: 0.0
-average reward score: 4.32421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.76%) |Training time=0.49s (22.70%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1687|ppo_ep: 1|act_loss: -0.01319122314453125|cri_loss: -0.005558013916015625|unsuper_loss: 0.0
-average reward score: 4.6328125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.12%) |Training time=0.48s (22.34%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1688|ppo_ep: 1|act_loss: -0.003948211669921875|cri_loss: 0.0012969970703125|unsuper_loss: 0.0
-average reward score: 4.3984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.86%) |Training time=0.49s (22.61%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41
-[2023-04-14 09:49:55,740] [INFO] [logging.py:96:log_dist] [Rank 0] step=1690, skipped=22, lr=[8.797195474860359e-06, 8.797195474860359e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:49:55,758] [INFO] [timer.py:199:stop] epoch=0/micro_step=1690/global_step=1690, RunningAvgSamplesPerSec=107.97908023515386, CurrSamplesPerSec=98.599461374245, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:49:55,851] [INFO] [logging.py:96:log_dist] [Rank 0] step=1690, skipped=24, lr=[4.559224531428731e-06, 4.559224531428731e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1689|ppo_ep: 1|act_loss: -0.04498291015625|cri_loss: -0.0214385986328125|unsuper_loss: 0.0
-average reward score: 5.28125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.98%) |Training time=0.49s (22.50%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1690|ppo_ep: 1|act_loss: 0.044769287109375|cri_loss: 0.02410888671875|unsuper_loss: 0.0
-average reward score: 4.01171875
--------------------------------------------------------------------------------------
-|E2E latency=3.03s |Gather latency=0.00s (0.00%) |Generate time=1.58s (52.18%) |Training time=0.49s (16.08%) |Others=0.96 (31.73%)|CurSamplesPerSec=10.56 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1691|ppo_ep: 1|act_loss: 0.019256591796875|cri_loss: 0.0107574462890625|unsuper_loss: 0.0
-average reward score: 4.0625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.84%) |Training time=0.49s (22.61%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1692|ppo_ep: 1|act_loss: -0.0185089111328125|cri_loss: -0.007755279541015625|unsuper_loss: 0.0
-average reward score: 4.2109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.04%) |Training time=0.49s (22.42%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1693|ppo_ep: 1|act_loss: -0.00452423095703125|cri_loss: -0.0012645721435546875|unsuper_loss: 0.0
-average reward score: 3.41015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (22.06%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1694|ppo_ep: 1|act_loss: -0.01233673095703125|cri_loss: -0.00475311279296875|unsuper_loss: 0.0
-average reward score: 3.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.48s (22.40%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1695|ppo_ep: 1|act_loss: 0.01306915283203125|cri_loss: 0.00788116455078125|unsuper_loss: 0.0
-average reward score: 4.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.07%) |Training time=0.48s (20.66%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1696|ppo_ep: 1|act_loss: 0.01459503173828125|cri_loss: 0.00774383544921875|unsuper_loss: 0.0
-average reward score: 6.890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.86%) |Training time=0.48s (22.24%) |Others=0.11 (4.90%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1697|ppo_ep: 1|act_loss: -0.010650634765625|cri_loss: -0.00344085693359375|unsuper_loss: 0.0
-average reward score: 4.671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.50%) |Training time=0.48s (22.00%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1698|ppo_ep: 1|act_loss: 0.015228271484375|cri_loss: 0.00823211669921875|unsuper_loss: 0.0
-average reward score: 4.71484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.48s (22.39%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41
-[2023-04-14 09:50:18,441] [INFO] [logging.py:96:log_dist] [Rank 0] step=1700, skipped=22, lr=[8.786620819365276e-06, 8.786620819365276e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:50:18,460] [INFO] [timer.py:199:stop] epoch=0/micro_step=1700/global_step=1700, RunningAvgSamplesPerSec=107.92857209171187, CurrSamplesPerSec=103.74092618671828, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:50:18,554] [INFO] [logging.py:96:log_dist] [Rank 0] step=1700, skipped=24, lr=[4.553751530132009e-06, 4.553751530132009e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1699|ppo_ep: 1|act_loss: -0.0390625|cri_loss: -0.0186309814453125|unsuper_loss: 0.0
-average reward score: 5.40625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.67%) |Training time=0.47s (21.65%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1700|ppo_ep: 1|act_loss: 0.01053619384765625|cri_loss: 0.005527496337890625|unsuper_loss: 0.0
-average reward score: 4.95703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.33%) |Training time=0.48s (22.14%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1701|ppo_ep: 1|act_loss: 0.0002722740173339844|cri_loss: 0.0005764961242675781|unsuper_loss: 0.0
-average reward score: 4.328125
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.59s (69.91%) |Training time=0.49s (21.56%) |Others=0.19 (8.53%)|CurSamplesPerSec=14.10 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1702|ppo_ep: 1|act_loss: -0.002727508544921875|cri_loss: -0.0006961822509765625|unsuper_loss: 0.0
-average reward score: 4.84375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.21%) |Training time=0.48s (22.23%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1703|ppo_ep: 1|act_loss: -0.0056304931640625|cri_loss: -0.0018415451049804688|unsuper_loss: 0.0
-average reward score: 3.80078125
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.03%) |Training time=0.48s (21.91%) |Others=0.11 (5.06%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1704|ppo_ep: 1|act_loss: 0.0157623291015625|cri_loss: 0.00850677490234375|unsuper_loss: 0.0
-average reward score: 5.21484375
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.39%) |Training time=0.48s (21.28%) |Others=0.10 (4.32%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1705|ppo_ep: 1|act_loss: 0.0118255615234375|cri_loss: 0.006561279296875|unsuper_loss: 0.0
-average reward score: 4.46484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.19%) |Training time=0.48s (22.27%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1706|ppo_ep: 1|act_loss: -0.0498046875|cri_loss: -0.0225372314453125|unsuper_loss: 0.0
-average reward score: 4.34375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.24%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1707|ppo_ep: 1|act_loss: -0.0137176513671875|cri_loss: -0.00522613525390625|unsuper_loss: 0.0
-average reward score: 4.85546875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.48s (22.04%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1708|ppo_ep: 1|act_loss: -0.01203155517578125|cri_loss: -0.00525665283203125|unsuper_loss: 0.0
-average reward score: 3.890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.85%) |Training time=0.49s (22.62%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41
-[2023-04-14 09:50:40,357] [INFO] [logging.py:96:log_dist] [Rank 0] step=1710, skipped=22, lr=[8.77598744309276e-06, 8.77598744309276e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:50:40,375] [INFO] [timer.py:199:stop] epoch=0/micro_step=1710/global_step=1710, RunningAvgSamplesPerSec=107.88094685691051, CurrSamplesPerSec=101.33945819997479, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:50:40,517] [INFO] [logging.py:96:log_dist] [Rank 0] step=1710, skipped=24, lr=[4.548248087283204e-06, 4.548248087283204e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1709|ppo_ep: 1|act_loss: -0.0095977783203125|cri_loss: -0.0038585662841796875|unsuper_loss: 0.0
-average reward score: 4.04296875
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.82%) |Training time=0.48s (21.60%) |Others=0.15 (6.58%)|CurSamplesPerSec=14.44 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1710|ppo_ep: 1|act_loss: -0.0081634521484375|cri_loss: -0.003696441650390625|unsuper_loss: 0.0
-average reward score: 3.7890625
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.41%) |Training time=0.49s (22.04%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1711|ppo_ep: 1|act_loss: 0.00742340087890625|cri_loss: 0.0043792724609375|unsuper_loss: 0.0
-average reward score: 4.5625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.93%) |Training time=0.49s (22.55%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1712|ppo_ep: 1|act_loss: 0.0270843505859375|cri_loss: 0.01422882080078125|unsuper_loss: 0.0
-average reward score: 3.84375
--------------------------------------------------------------------------------------
-|E2E latency=2.07s |Gather latency=0.00s (0.00%) |Generate time=1.59s (76.61%) |Training time=0.39s (18.64%) |Others=0.10 (4.75%)|CurSamplesPerSec=15.43 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1713|ppo_ep: 1|act_loss: -0.00589752197265625|cri_loss: -0.002044677734375|unsuper_loss: 0.0
-average reward score: 4.82421875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.71%) |Training time=0.47s (21.71%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1714|ppo_ep: 1|act_loss: 0.00591278076171875|cri_loss: 0.003887176513671875|unsuper_loss: 0.0
-average reward score: 4.26171875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.47s (21.77%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1715|ppo_ep: 1|act_loss: -0.02520751953125|cri_loss: -0.0118408203125|unsuper_loss: 0.0
-average reward score: 5.046875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.47s (21.67%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1716|ppo_ep: 1|act_loss: -0.03692626953125|cri_loss: -0.01666259765625|unsuper_loss: 0.0
-average reward score: 3.564453125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.37%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1717|ppo_ep: 1|act_loss: -0.049041748046875|cri_loss: -0.02178955078125|unsuper_loss: 0.0
-average reward score: 3.392578125
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.15%) |Training time=0.45s (21.24%) |Others=0.10 (4.61%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1718|ppo_ep: 1|act_loss: -0.0164794921875|cri_loss: -0.00760650634765625|unsuper_loss: 0.0
-average reward score: 4.078125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.03%) |Training time=0.46s (21.38%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.42
-[2023-04-14 09:51:02,716] [INFO] [logging.py:96:log_dist] [Rank 0] step=1720, skipped=22, lr=[8.765295503655101e-06, 8.765295503655101e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:51:02,734] [INFO] [timer.py:199:stop] epoch=0/micro_step=1720/global_step=1720, RunningAvgSamplesPerSec=107.88049260354408, CurrSamplesPerSec=107.629738450978, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:51:02,827] [INFO] [logging.py:96:log_dist] [Rank 0] step=1720, skipped=24, lr=[4.542714284456616e-06, 4.542714284456616e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1719|ppo_ep: 1|act_loss: 0.03533935546875|cri_loss: 0.0183563232421875|unsuper_loss: 0.0
-average reward score: 5.19140625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.95%) |Training time=0.46s (21.48%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1720|ppo_ep: 1|act_loss: 0.064453125|cri_loss: 0.03436279296875|unsuper_loss: 0.0
-average reward score: 3.904296875
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.06%) |Training time=0.45s (21.31%) |Others=0.10 (4.63%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1721|ppo_ep: 1|act_loss: 0.016632080078125|cri_loss: 0.011627197265625|unsuper_loss: 0.0
-average reward score: 4.26171875
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.13%) |Training time=0.45s (21.26%) |Others=0.10 (4.60%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1722|ppo_ep: 1|act_loss: 0.05401611328125|cri_loss: 0.0280609130859375|unsuper_loss: 0.0
-average reward score: 5.17578125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.85%) |Training time=0.46s (21.56%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1723|ppo_ep: 1|act_loss: 0.00507354736328125|cri_loss: 0.0029201507568359375|unsuper_loss: 0.0
-average reward score: 4.26953125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.72%) |Training time=0.46s (21.68%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1724|ppo_ep: 1|act_loss: 0.01146697998046875|cri_loss: 0.006473541259765625|unsuper_loss: 0.0
-average reward score: 4.16015625
--------------------------------------------------------------------------------------
-|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.74s (75.64%) |Training time=0.46s (20.09%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.91 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1725|ppo_ep: 1|act_loss: -0.07354736328125|cri_loss: -0.032379150390625|unsuper_loss: 0.0
-average reward score: 3.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.22%) |Training time=0.48s (22.22%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1726|ppo_ep: 1|act_loss: -0.0413818359375|cri_loss: -0.0194549560546875|unsuper_loss: 0.0
-average reward score: 3.080078125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.69%) |Training time=0.46s (21.71%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1727|ppo_ep: 1|act_loss: 0.0097808837890625|cri_loss: 0.007587432861328125|unsuper_loss: 0.0
-average reward score: 5.30859375
--------------------------------------------------------------------------------------
-|E2E latency=2.91s |Gather latency=0.00s (0.00%) |Generate time=1.58s (54.26%) |Training time=0.46s (15.74%) |Others=0.87 (30.00%)|CurSamplesPerSec=11.02 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1728|ppo_ep: 1|act_loss: -0.0242767333984375|cri_loss: -0.010833740234375|unsuper_loss: 0.0
-average reward score: 4.1875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.70%) |Training time=0.46s (21.69%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.42
-[2023-04-14 09:51:25,354] [INFO] [logging.py:96:log_dist] [Rank 0] step=1730, skipped=22, lr=[8.754545159532632e-06, 8.754545159532632e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:51:25,370] [INFO] [timer.py:199:stop] epoch=0/micro_step=1730/global_step=1730, RunningAvgSamplesPerSec=107.87975778832639, CurrSamplesPerSec=117.8119436052013, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:51:25,466] [INFO] [logging.py:96:log_dist] [Rank 0] step=1730, skipped=24, lr=[4.537150203676553e-06, 4.537150203676553e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1729|ppo_ep: 1|act_loss: 0.035400390625|cri_loss: 0.01837158203125|unsuper_loss: 0.0
-average reward score: 4.88671875
--------------------------------------------------------------------------------------
-|E2E latency=2.46s |Gather latency=0.00s (0.00%) |Generate time=1.87s (75.81%) |Training time=0.49s (20.01%) |Others=0.10 (4.18%)|CurSamplesPerSec=12.98 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1730|ppo_ep: 1|act_loss: 0.0250091552734375|cri_loss: 0.0130767822265625|unsuper_loss: 0.0
-average reward score: 4.09765625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.73%) |Training time=0.47s (21.68%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1731|ppo_ep: 1|act_loss: 0.03399658203125|cri_loss: 0.0174713134765625|unsuper_loss: 0.0
-average reward score: 4.6171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.89%) |Training time=0.49s (22.41%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1732|ppo_ep: 1|act_loss: 0.00628662109375|cri_loss: 0.004150390625|unsuper_loss: 0.0
-average reward score: 4.796875
--------------------------------------------------------------------------------------
-|E2E latency=2.75s |Gather latency=0.00s (0.00%) |Generate time=1.59s (58.04%) |Training time=0.51s (18.54%) |Others=0.64 (23.42%)|CurSamplesPerSec=11.65 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1733|ppo_ep: 1|act_loss: 0.03253173828125|cri_loss: 0.016815185546875|unsuper_loss: 0.0
-average reward score: 5.34765625
--------------------------------------------------------------------------------------
-|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.68s (73.06%) |Training time=0.52s (22.67%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.90 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1734|ppo_ep: 1|act_loss: 0.0027866363525390625|cri_loss: 0.0016613006591796875|unsuper_loss: 0.0
-average reward score: 4.13671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.30%) |Training time=0.48s (22.17%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1735|ppo_ep: 1|act_loss: -0.01568603515625|cri_loss: -0.007293701171875|unsuper_loss: 0.0
-average reward score: 3.78125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.80%) |Training time=0.49s (22.67%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1736|ppo_ep: 1|act_loss: -0.0134735107421875|cri_loss: -0.00504302978515625|unsuper_loss: 0.0
-average reward score: 3.9375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.96%) |Training time=0.49s (22.50%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1737|ppo_ep: 1|act_loss: -0.029541015625|cri_loss: -0.01422882080078125|unsuper_loss: 0.0
-average reward score: 4.79296875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.16%) |Training time=0.48s (22.30%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1738|ppo_ep: 1|act_loss: -0.004878997802734375|cri_loss: -0.0018644332885742188|unsuper_loss: 0.0
-average reward score: 3.884765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.08%) |Training time=0.48s (22.37%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
-[2023-04-14 09:51:47,899] [INFO] [logging.py:96:log_dist] [Rank 0] step=1740, skipped=22, lr=[8.743736570071387e-06, 8.743736570071387e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:51:47,917] [INFO] [timer.py:199:stop] epoch=0/micro_step=1740/global_step=1740, RunningAvgSamplesPerSec=107.81197459690361, CurrSamplesPerSec=93.59621982583081, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:51:48,010] [INFO] [logging.py:96:log_dist] [Rank 0] step=1740, skipped=24, lr=[4.531555927416115e-06, 4.531555927416115e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1739|ppo_ep: 1|act_loss: 0.01534271240234375|cri_loss: 0.00815582275390625|unsuper_loss: 0.0
-average reward score: 4.015625
--------------------------------------------------------------------------------------
-|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.32%) |Training time=0.50s (21.48%) |Others=0.10 (4.19%)|CurSamplesPerSec=13.63 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1740|ppo_ep: 1|act_loss: 0.041717529296875|cri_loss: 0.021728515625|unsuper_loss: 0.0
-average reward score: 4.62890625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.59%) |Training time=0.50s (22.90%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1741|ppo_ep: 1|act_loss: 0.017333984375|cri_loss: 0.0090484619140625|unsuper_loss: 0.0
-average reward score: 3.990234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.78%) |Training time=0.49s (22.70%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1742|ppo_ep: 1|act_loss: -0.0078582763671875|cri_loss: -0.003448486328125|unsuper_loss: 0.0
-average reward score: 4.50390625
--------------------------------------------------------------------------------------
-|E2E latency=2.46s |Gather latency=0.00s (0.00%) |Generate time=1.58s (64.12%) |Training time=0.49s (19.89%) |Others=0.39 (15.99%)|CurSamplesPerSec=13.02 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1743|ppo_ep: 1|act_loss: 0.00637054443359375|cri_loss: 0.0039215087890625|unsuper_loss: 0.0
-average reward score: 4.7421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.00%) |Training time=0.49s (22.46%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1744|ppo_ep: 1|act_loss: -0.00989532470703125|cri_loss: -0.00441741943359375|unsuper_loss: 0.0
-average reward score: 3.30859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.98%) |Training time=0.49s (22.48%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1745|ppo_ep: 1|act_loss: -0.0804443359375|cri_loss: -0.03790283203125|unsuper_loss: 0.0
-average reward score: 3.40234375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.18%) |Training time=0.46s (21.26%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1746|ppo_ep: 1|act_loss: -0.03265380859375|cri_loss: -0.0139923095703125|unsuper_loss: 0.0
-average reward score: 4.05078125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.41%) |Training time=0.45s (21.02%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1747|ppo_ep: 1|act_loss: -0.0177001953125|cri_loss: -0.0078887939453125|unsuper_loss: 0.0
-average reward score: 4.625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.25%) |Training time=0.46s (21.18%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1748|ppo_ep: 1|act_loss: -0.0044708251953125|cri_loss: -0.00197601318359375|unsuper_loss: 0.0
-average reward score: 5.6015625
--------------------------------------------------------------------------------------
-|E2E latency=2.45s |Gather latency=0.00s (0.00%) |Generate time=1.59s (65.05%) |Training time=0.46s (18.74%) |Others=0.40 (16.21%)|CurSamplesPerSec=13.05 |AvgSamplesPerSec=14.41
-[2023-04-14 09:52:10,107] [INFO] [logging.py:96:log_dist] [Rank 0] step=1750, skipped=22, lr=[8.732869895480736e-06, 8.732869895480736e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:52:10,126] [INFO] [timer.py:199:stop] epoch=0/micro_step=1750/global_step=1750, RunningAvgSamplesPerSec=107.78236145408847, CurrSamplesPerSec=109.62930097272944, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:52:10,218] [INFO] [logging.py:96:log_dist] [Rank 0] step=1750, skipped=24, lr=[4.525931538595969e-06, 4.525931538595969e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1749|ppo_ep: 1|act_loss: 0.007076263427734375|cri_loss: 0.00386810302734375|unsuper_loss: 0.0
-average reward score: 3.205078125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.32%) |Training time=0.45s (21.13%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1750|ppo_ep: 1|act_loss: 0.0384521484375|cri_loss: 0.02001953125|unsuper_loss: 0.0
-average reward score: 4.59765625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.31%) |Training time=0.45s (21.12%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1751|ppo_ep: 1|act_loss: 0.07427978515625|cri_loss: 0.040679931640625|unsuper_loss: 0.0
-average reward score: 4.08984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.02%) |Training time=0.46s (21.42%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1752|ppo_ep: 1|act_loss: 0.0146026611328125|cri_loss: 0.0081329345703125|unsuper_loss: 0.0
-average reward score: 4.93359375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.16%) |Training time=0.46s (21.27%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1753|ppo_ep: 1|act_loss: -0.022735595703125|cri_loss: -0.01081085205078125|unsuper_loss: 0.0
-average reward score: 4.875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.02%) |Training time=0.46s (21.41%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1754|ppo_ep: 1|act_loss: -0.03564453125|cri_loss: -0.01702880859375|unsuper_loss: 0.0
-average reward score: 5.19921875
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.88%) |Training time=0.46s (19.87%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1755|ppo_ep: 1|act_loss: -0.0101776123046875|cri_loss: -0.004772186279296875|unsuper_loss: 0.0
-average reward score: 4.0
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.97%) |Training time=0.46s (21.46%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1756|ppo_ep: 1|act_loss: -0.01068878173828125|cri_loss: -0.00504302978515625|unsuper_loss: 0.0
-average reward score: 4.4296875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.05%) |Training time=0.46s (21.40%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1757|ppo_ep: 1|act_loss: 0.00408935546875|cri_loss: 0.003238677978515625|unsuper_loss: 0.0
-average reward score: 4.30859375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.21%) |Training time=0.46s (21.22%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1758|ppo_ep: 1|act_loss: 0.0113983154296875|cri_loss: 0.006969451904296875|unsuper_loss: 0.0
-average reward score: 3.5625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.65%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
-[2023-04-14 09:52:31,837] [INFO] [logging.py:96:log_dist] [Rank 0] step=1760, skipped=22, lr=[8.72194529683101e-06, 8.72194529683101e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:52:31,856] [INFO] [timer.py:199:stop] epoch=0/micro_step=1760/global_step=1760, RunningAvgSamplesPerSec=107.78163376115772, CurrSamplesPerSec=105.12303566505426, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:52:31,949] [INFO] [logging.py:96:log_dist] [Rank 0] step=1760, skipped=24, lr=[4.5202771205831286e-06, 4.5202771205831286e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1759|ppo_ep: 1|act_loss: 0.01348876953125|cri_loss: 0.007396697998046875|unsuper_loss: 0.0
-average reward score: 3.6328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.94%) |Training time=0.47s (21.51%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1760|ppo_ep: 1|act_loss: 0.11712646484375|cri_loss: 0.06634521484375|unsuper_loss: 0.0
-average reward score: 4.421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.05%) |Training time=0.46s (21.38%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1761|ppo_ep: 1|act_loss: -0.01331329345703125|cri_loss: -0.00519561767578125|unsuper_loss: 0.0
-average reward score: 5.4609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.42%) |Training time=0.46s (21.06%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1762|ppo_ep: 1|act_loss: 0.0191802978515625|cri_loss: 0.00983428955078125|unsuper_loss: 0.0
-average reward score: 4.02734375
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.41%) |Training time=0.46s (20.24%) |Others=0.10 (4.36%)|CurSamplesPerSec=14.19 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1763|ppo_ep: 1|act_loss: -0.0606689453125|cri_loss: -0.0264739990234375|unsuper_loss: 0.0
-average reward score: 3.58203125
--------------------------------------------------------------------------------------
-|E2E latency=2.74s |Gather latency=0.00s (0.00%) |Generate time=1.60s (58.39%) |Training time=0.46s (16.65%) |Others=0.68 (24.95%)|CurSamplesPerSec=11.68 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1764|ppo_ep: 1|act_loss: 0.034393310546875|cri_loss: 0.01776123046875|unsuper_loss: 0.0
-average reward score: 4.23046875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.46s (21.27%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1765|ppo_ep: 1|act_loss: -0.0011138916015625|cri_loss: -0.00027751922607421875|unsuper_loss: 0.0
-average reward score: 4.03125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.11%) |Training time=0.46s (21.33%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1766|ppo_ep: 1|act_loss: -0.0205230712890625|cri_loss: -0.0092010498046875|unsuper_loss: 0.0
-average reward score: 4.3046875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.46s (21.15%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1767|ppo_ep: 1|act_loss: -0.01461029052734375|cri_loss: -0.00675201416015625|unsuper_loss: 0.0
-average reward score: 3.173828125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.22%) |Training time=0.46s (21.18%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1768|ppo_ep: 1|act_loss: -0.00934600830078125|cri_loss: -0.004116058349609375|unsuper_loss: 0.0
-average reward score: 3.951171875
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.92%) |Training time=0.46s (19.85%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.75 |AvgSamplesPerSec=14.41
-[2023-04-14 09:52:54,244] [INFO] [logging.py:96:log_dist] [Rank 0] step=1770, skipped=22, lr=[8.710962936051119e-06, 8.710962936051119e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:52:54,263] [INFO] [timer.py:199:stop] epoch=0/micro_step=1770/global_step=1770, RunningAvgSamplesPerSec=107.78735625931982, CurrSamplesPerSec=109.66692295813189, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:52:54,355] [INFO] [logging.py:96:log_dist] [Rank 0] step=1770, skipped=24, lr=[4.514592757189707e-06, 4.514592757189707e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1769|ppo_ep: 1|act_loss: -0.000946044921875|cri_loss: 0.00171661376953125|unsuper_loss: 0.0
-average reward score: 4.08984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.31%) |Training time=0.45s (21.14%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1770|ppo_ep: 1|act_loss: -0.017913818359375|cri_loss: -0.0079498291015625|unsuper_loss: 0.0
-average reward score: 4.16796875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.25%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1771|ppo_ep: 1|act_loss: 0.0276641845703125|cri_loss: 0.0149993896484375|unsuper_loss: 0.0
-average reward score: 4.296875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.97%) |Training time=0.46s (21.46%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1772|ppo_ep: 1|act_loss: 0.02630615234375|cri_loss: 0.014068603515625|unsuper_loss: 0.0
-average reward score: 4.80078125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.10%) |Training time=0.46s (21.34%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1773|ppo_ep: 1|act_loss: 0.0014629364013671875|cri_loss: 0.0013713836669921875|unsuper_loss: 0.0
-average reward score: 5.171875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.88%) |Training time=0.47s (21.56%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1774|ppo_ep: 1|act_loss: -0.009490966796875|cri_loss: -0.004062652587890625|unsuper_loss: 0.0
-average reward score: 4.27734375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.46s (21.17%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1775|ppo_ep: 1|act_loss: -0.06005859375|cri_loss: -0.0272369384765625|unsuper_loss: 0.0
-average reward score: 3.927734375
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.71%) |Training time=0.44s (20.68%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1776|ppo_ep: 1|act_loss: -0.0263824462890625|cri_loss: -0.0122528076171875|unsuper_loss: 0.0
-average reward score: 4.84375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.64%) |Training time=0.44s (20.76%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1777|ppo_ep: 1|act_loss: 0.0029964447021484375|cri_loss: 0.0020046234130859375|unsuper_loss: 0.0
-average reward score: 4.97265625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.51%) |Training time=0.45s (20.83%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1778|ppo_ep: 1|act_loss: 0.005260467529296875|cri_loss: 0.003143310546875|unsuper_loss: 0.0
-average reward score: 4.4765625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.32%) |Training time=0.45s (21.07%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.42
-[2023-04-14 09:53:15,699] [INFO] [logging.py:96:log_dist] [Rank 0] step=1780, skipped=22, lr=[8.699922975926139e-06, 8.699922975926139e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:53:15,718] [INFO] [timer.py:199:stop] epoch=0/micro_step=1780/global_step=1780, RunningAvgSamplesPerSec=107.80113421242028, CurrSamplesPerSec=113.88078375605919, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:53:15,810] [INFO] [logging.py:96:log_dist] [Rank 0] step=1780, skipped=24, lr=[4.508878532671684e-06, 4.508878532671684e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1779|ppo_ep: 1|act_loss: 0.04669189453125|cri_loss: 0.02532958984375|unsuper_loss: 0.0
-average reward score: 4.890625
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.64%) |Training time=0.44s (20.78%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1780|ppo_ep: 1|act_loss: 0.0433349609375|cri_loss: 0.0230712890625|unsuper_loss: 0.0
-average reward score: 4.015625
--------------------------------------------------------------------------------------
-|E2E latency=2.47s |Gather latency=0.00s (0.00%) |Generate time=1.59s (64.40%) |Training time=0.46s (18.43%) |Others=0.42 (17.17%)|CurSamplesPerSec=12.95 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1781|ppo_ep: 1|act_loss: 0.03173828125|cri_loss: 0.018829345703125|unsuper_loss: 0.0
-average reward score: 4.25
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.41%) |Training time=0.45s (21.02%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1782|ppo_ep: 1|act_loss: 0.00551605224609375|cri_loss: 0.002964019775390625|unsuper_loss: 0.0
-average reward score: 4.20703125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.67%) |Training time=0.44s (20.74%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1783|ppo_ep: 1|act_loss: 0.0203857421875|cri_loss: 0.01043701171875|unsuper_loss: 0.0
-average reward score: 4.46484375
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.85%) |Training time=0.46s (20.52%) |Others=0.17 (7.63%)|CurSamplesPerSec=14.42 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1784|ppo_ep: 1|act_loss: -0.0440673828125|cri_loss: -0.020599365234375|unsuper_loss: 0.0
-average reward score: 3.3828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.97%) |Training time=0.44s (20.32%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1785|ppo_ep: 1|act_loss: -0.0390625|cri_loss: -0.0182037353515625|unsuper_loss: 0.0
-average reward score: 4.4453125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.70%) |Training time=0.44s (20.71%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1786|ppo_ep: 1|act_loss: -0.032318115234375|cri_loss: -0.01308441162109375|unsuper_loss: 0.0
-average reward score: 4.69921875
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.06%) |Training time=0.43s (20.33%) |Others=0.10 (4.61%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1787|ppo_ep: 1|act_loss: 0.0012836456298828125|cri_loss: 0.0012454986572265625|unsuper_loss: 0.0
-average reward score: 4.52734375
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.93%) |Training time=0.44s (20.46%) |Others=0.10 (4.61%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1788|ppo_ep: 1|act_loss: -0.00051116943359375|cri_loss: 0.0003037452697753906|unsuper_loss: 0.0
-average reward score: 4.265625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.02%) |Training time=0.44s (20.38%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.42
-[2023-04-14 09:53:37,537] [INFO] [logging.py:96:log_dist] [Rank 0] step=1790, skipped=22, lr=[8.688825580094914e-06, 8.688825580094914e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:53:37,556] [INFO] [timer.py:199:stop] epoch=0/micro_step=1790/global_step=1790, RunningAvgSamplesPerSec=107.830801602799, CurrSamplesPerSec=110.82939976714037, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:53:37,649] [INFO] [logging.py:96:log_dist] [Rank 0] step=1790, skipped=24, lr=[4.503134531727652e-06, 4.503134531727652e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1789|ppo_ep: 1|act_loss: 0.0289306640625|cri_loss: 0.01531219482421875|unsuper_loss: 0.0
-average reward score: 4.75
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.40%) |Training time=0.45s (21.00%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1790|ppo_ep: 1|act_loss: 0.0858154296875|cri_loss: 0.04718017578125|unsuper_loss: 0.0
-average reward score: 4.03125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.19%) |Training time=0.46s (21.26%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1791|ppo_ep: 1|act_loss: 0.1064453125|cri_loss: 0.05609130859375|unsuper_loss: 0.0
-average reward score: 4.26953125
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.51%) |Training time=0.49s (22.04%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1792|ppo_ep: 1|act_loss: 0.0220184326171875|cri_loss: 0.01183319091796875|unsuper_loss: 0.0
-average reward score: 4.40625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.46s (21.16%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1793|ppo_ep: 1|act_loss: -0.00402069091796875|cri_loss: -0.001773834228515625|unsuper_loss: 0.0
-average reward score: 4.29296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.01%) |Training time=0.46s (21.42%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1794|ppo_ep: 1|act_loss: -0.00018310546875|cri_loss: 0.0029506683349609375|unsuper_loss: 0.0
-average reward score: 3.982421875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.47s (21.61%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1795|ppo_ep: 1|act_loss: -0.01824951171875|cri_loss: -0.00859832763671875|unsuper_loss: 0.0
-average reward score: 4.01953125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.94%) |Training time=0.46s (21.49%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1796|ppo_ep: 1|act_loss: 0.038330078125|cri_loss: 0.0207977294921875|unsuper_loss: 0.0
-average reward score: 4.4296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.47s (21.62%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1797|ppo_ep: 1|act_loss: -0.05828857421875|cri_loss: -0.0277099609375|unsuper_loss: 0.0
-average reward score: 5.10546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.47s (21.68%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1798|ppo_ep: 1|act_loss: -0.0136566162109375|cri_loss: -0.006031036376953125|unsuper_loss: 0.0
-average reward score: 3.41015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.47s (21.66%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.42
-[2023-04-14 09:53:59,319] [INFO] [logging.py:96:log_dist] [Rank 0] step=1800, skipped=22, lr=[8.677670913047617e-06, 8.677670913047617e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:53:59,587] [INFO] [timer.py:199:stop] epoch=0/micro_step=1800/global_step=1800, RunningAvgSamplesPerSec=107.77048253976814, CurrSamplesPerSec=57.6945053886944, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:53:59,680] [INFO] [logging.py:96:log_dist] [Rank 0] step=1800, skipped=24, lr=[4.49736083949756e-06, 4.49736083949756e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1799|ppo_ep: 1|act_loss: -0.002838134765625|cri_loss: -0.000732421875|unsuper_loss: 0.0
-average reward score: 4.3203125
--------------------------------------------------------------------------------------
-|E2E latency=2.57s |Gather latency=0.00s (0.00%) |Generate time=1.76s (68.28%) |Training time=0.72s (27.90%) |Others=0.10 (3.82%)|CurSamplesPerSec=12.45 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1800|ppo_ep: 1|act_loss: -0.004962921142578125|cri_loss: -0.002277374267578125|unsuper_loss: 0.0
-average reward score: 5.18359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (22.07%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1801|ppo_ep: 1|act_loss: 0.00911712646484375|cri_loss: 0.00502777099609375|unsuper_loss: 0.0
-average reward score: 4.32421875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.90%) |Training time=0.46s (21.54%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1802|ppo_ep: 1|act_loss: 0.0194091796875|cri_loss: 0.0101318359375|unsuper_loss: 0.0
-average reward score: 5.296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.47s (21.67%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1803|ppo_ep: 1|act_loss: -0.004100799560546875|cri_loss: -0.0012693405151367188|unsuper_loss: 0.0
-average reward score: 5.61328125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.69%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1804|ppo_ep: 1|act_loss: 0.005481719970703125|cri_loss: 0.00354766845703125|unsuper_loss: 0.0
-average reward score: 4.5625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.30%) |Training time=0.46s (21.12%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1805|ppo_ep: 1|act_loss: 0.009002685546875|cri_loss: 0.0046844482421875|unsuper_loss: 0.0
-average reward score: 4.77734375
--------------------------------------------------------------------------------------
-|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.60s (67.59%) |Training time=0.46s (19.33%) |Others=0.31 (13.08%)|CurSamplesPerSec=13.55 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1806|ppo_ep: 1|act_loss: -0.016754150390625|cri_loss: -0.00707244873046875|unsuper_loss: 0.0
-average reward score: 4.13671875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.89%) |Training time=0.46s (21.54%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1807|ppo_ep: 1|act_loss: -0.021514892578125|cri_loss: -0.01013946533203125|unsuper_loss: 0.0
-average reward score: 3.890625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.74%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1808|ppo_ep: 1|act_loss: 0.022613525390625|cri_loss: 0.01165008544921875|unsuper_loss: 0.0
-average reward score: 4.69140625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.46s (21.56%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.42
-[2023-04-14 09:54:21,316] [INFO] [logging.py:96:log_dist] [Rank 0] step=1810, skipped=22, lr=[8.66645914012333e-06, 8.66645914012333e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:54:21,334] [INFO] [timer.py:199:stop] epoch=0/micro_step=1810/global_step=1810, RunningAvgSamplesPerSec=107.75986855875877, CurrSamplesPerSec=105.9602488394859, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:54:21,427] [INFO] [logging.py:96:log_dist] [Rank 0] step=1810, skipped=24, lr=[4.491557541561456e-06, 4.491557541561456e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1809|ppo_ep: 1|act_loss: 0.03271484375|cri_loss: 0.0170440673828125|unsuper_loss: 0.0
-average reward score: 4.0234375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.80%) |Training time=0.46s (21.65%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1810|ppo_ep: 1|act_loss: -0.0049285888671875|cri_loss: -0.002193450927734375|unsuper_loss: 0.0
-average reward score: 4.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.88s |Gather latency=0.00s (0.00%) |Generate time=1.59s (55.11%) |Training time=0.47s (16.32%) |Others=0.82 (28.58%)|CurSamplesPerSec=11.10 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1811|ppo_ep: 1|act_loss: -0.0049896240234375|cri_loss: -0.002170562744140625|unsuper_loss: 0.0
-average reward score: 5.1484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.05%) |Training time=0.46s (21.39%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1812|ppo_ep: 1|act_loss: -0.0087127685546875|cri_loss: -0.0037517547607421875|unsuper_loss: 0.0
-average reward score: 4.2421875
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.81%) |Training time=0.47s (21.34%) |Others=0.15 (6.85%)|CurSamplesPerSec=14.51 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1813|ppo_ep: 1|act_loss: 0.01837158203125|cri_loss: 0.00933837890625|unsuper_loss: 0.0
-average reward score: 5.1875
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.82%) |Training time=0.47s (20.84%) |Others=0.10 (4.34%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1814|ppo_ep: 1|act_loss: -0.0235595703125|cri_loss: -0.01139068603515625|unsuper_loss: 0.0
-average reward score: 4.48828125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.05%) |Training time=0.46s (21.39%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1815|ppo_ep: 1|act_loss: -0.002483367919921875|cri_loss: -0.0010623931884765625|unsuper_loss: 0.0
-average reward score: 3.763671875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.95%) |Training time=0.46s (21.48%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1816|ppo_ep: 1|act_loss: 0.027923583984375|cri_loss: 0.01465606689453125|unsuper_loss: 0.0
-average reward score: 4.19140625
--------------------------------------------------------------------------------------
-|E2E latency=3.37s |Gather latency=0.00s (0.00%) |Generate time=1.59s (47.29%) |Training time=0.45s (13.42%) |Others=1.32 (39.28%)|CurSamplesPerSec=9.49 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1817|ppo_ep: 1|act_loss: 0.03106689453125|cri_loss: 0.0168914794921875|unsuper_loss: 0.0
-average reward score: 3.7109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.38%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1818|ppo_ep: 1|act_loss: -0.027679443359375|cri_loss: -0.01276397705078125|unsuper_loss: 0.0
-average reward score: 4.33984375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.17%) |Training time=0.45s (21.22%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.42
-[2023-04-14 09:54:44,979] [INFO] [logging.py:96:log_dist] [Rank 0] step=1820, skipped=22, lr=[8.65519042750757e-06, 8.65519042750757e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:54:44,998] [INFO] [timer.py:199:stop] epoch=0/micro_step=1820/global_step=1820, RunningAvgSamplesPerSec=107.75628443014152, CurrSamplesPerSec=109.50247245462815, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:54:45,090] [INFO] [logging.py:96:log_dist] [Rank 0] step=1820, skipped=24, lr=[4.485724723938215e-06, 4.485724723938215e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1819|ppo_ep: 1|act_loss: 0.0186767578125|cri_loss: 0.0098114013671875|unsuper_loss: 0.0
-average reward score: 5.24609375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.65%) |Training time=0.45s (20.86%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1820|ppo_ep: 1|act_loss: 0.01434326171875|cri_loss: 0.007396697998046875|unsuper_loss: 0.0
-average reward score: 4.30078125
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.68s (74.57%) |Training time=0.47s (21.07%) |Others=0.10 (4.36%)|CurSamplesPerSec=14.22 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1821|ppo_ep: 1|act_loss: 0.00457763671875|cri_loss: 0.0029048919677734375|unsuper_loss: 0.0
-average reward score: 4.6875
--------------------------------------------------------------------------------------
-|E2E latency=2.78s |Gather latency=0.00s (0.00%) |Generate time=1.58s (56.71%) |Training time=0.47s (17.01%) |Others=0.73 (26.28%)|CurSamplesPerSec=11.49 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1822|ppo_ep: 1|act_loss: -0.027496337890625|cri_loss: -0.012420654296875|unsuper_loss: 0.0
-average reward score: 4.42578125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.26%) |Training time=0.48s (22.18%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1823|ppo_ep: 1|act_loss: -0.0218505859375|cri_loss: -0.0094757080078125|unsuper_loss: 0.0
-average reward score: 4.5390625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.48%) |Training time=0.47s (21.95%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1824|ppo_ep: 1|act_loss: -0.0355224609375|cri_loss: -0.015899658203125|unsuper_loss: 0.0
-average reward score: 4.5234375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.19%) |Training time=0.48s (22.25%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1825|ppo_ep: 1|act_loss: -0.0250396728515625|cri_loss: -0.01177978515625|unsuper_loss: 0.0
-average reward score: 4.6953125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.57%) |Training time=0.47s (21.84%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1826|ppo_ep: 1|act_loss: -0.0089111328125|cri_loss: -0.0037994384765625|unsuper_loss: 0.0
-average reward score: 4.3671875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.48s (22.07%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1827|ppo_ep: 1|act_loss: -0.00405120849609375|cri_loss: -0.000476837158203125|unsuper_loss: 0.0
-average reward score: 5.2734375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.33%) |Training time=0.47s (22.10%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1828|ppo_ep: 1|act_loss: 0.00782012939453125|cri_loss: 0.0044708251953125|unsuper_loss: 0.0
-average reward score: 3.943359375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.57s (71.99%) |Training time=0.51s (23.51%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.41
-[2023-04-14 09:55:07,257] [INFO] [logging.py:96:log_dist] [Rank 0] step=1830, skipped=22, lr=[8.643864942229842e-06, 8.643864942229842e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:55:07,276] [INFO] [timer.py:199:stop] epoch=0/micro_step=1830/global_step=1830, RunningAvgSamplesPerSec=107.72570916256083, CurrSamplesPerSec=101.29953304215451, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:55:07,369] [INFO] [logging.py:96:log_dist] [Rank 0] step=1830, skipped=24, lr=[4.479862473084266e-06, 4.479862473084266e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1829|ppo_ep: 1|act_loss: 0.04193115234375|cri_loss: 0.0218658447265625|unsuper_loss: 0.0
-average reward score: 5.3828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.27%) |Training time=0.48s (22.18%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.41
-epoch: 0|step: 1830|ppo_ep: 1|act_loss: -0.0260772705078125|cri_loss: -0.01226806640625|unsuper_loss: 0.0
-average reward score: 4.5859375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.31%) |Training time=0.48s (22.12%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.42
-[2023-04-14 09:55:11,655] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 1831|ppo_ep: 1|act_loss: -0.002475738525390625|cri_loss: 0.0001316070556640625|unsuper_loss: 0.0
-average reward score: 4.53125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.90%) |Training time=0.47s (22.01%) |Others=0.09 (4.10%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.42
-[2023-04-14 09:55:13,796] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 1832|ppo_ep: 1|act_loss: -0.0205841064453125|cri_loss: -0.007701873779296875|unsuper_loss: 0.0
-average reward score: 4.4921875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.71%) |Training time=0.48s (22.19%) |Others=0.09 (4.09%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1833|ppo_ep: 1|act_loss: -0.0280303955078125|cri_loss: -0.0123748779296875|unsuper_loss: 0.0
-average reward score: 3.974609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.79%) |Training time=0.47s (21.64%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1834|ppo_ep: 1|act_loss: 0.022491455078125|cri_loss: 0.01218414306640625|unsuper_loss: 0.0
-average reward score: 4.30859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.07%) |Training time=0.49s (22.39%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1835|ppo_ep: 1|act_loss: 0.027008056640625|cri_loss: 0.014068603515625|unsuper_loss: 0.0
-average reward score: 3.689453125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.00%) |Training time=0.46s (21.40%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1836|ppo_ep: 1|act_loss: 0.0245513916015625|cri_loss: 0.0125579833984375|unsuper_loss: 0.0
-average reward score: 4.1015625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.48%) |Training time=0.47s (21.93%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1837|ppo_ep: 1|act_loss: 0.0110015869140625|cri_loss: 0.00743865966796875|unsuper_loss: 0.0
-average reward score: 4.21875
--------------------------------------------------------------------------------------
-|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.87%) |Training time=0.39s (18.40%) |Others=0.10 (4.73%)|CurSamplesPerSec=15.12 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1838|ppo_ep: 1|act_loss: 0.00502777099609375|cri_loss: 0.0027179718017578125|unsuper_loss: 0.0
-average reward score: 4.6328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.86%) |Training time=0.40s (18.48%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.42
-[2023-04-14 09:55:28,958] [INFO] [logging.py:96:log_dist] [Rank 0] step=1840, skipped=22, lr=[8.632482852161159e-06, 8.632482852161159e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:55:28,976] [INFO] [timer.py:199:stop] epoch=0/micro_step=1840/global_step=1840, RunningAvgSamplesPerSec=107.73700504107984, CurrSamplesPerSec=111.84251454093962, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:55:29,068] [INFO] [logging.py:96:log_dist] [Rank 0] step=1840, skipped=26, lr=[4.475151538852264e-06, 4.475151538852264e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1839|ppo_ep: 1|act_loss: -0.0207672119140625|cri_loss: -0.00972747802734375|unsuper_loss: 0.0
-average reward score: 4.3359375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.57%) |Training time=0.45s (20.86%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1840|ppo_ep: 1|act_loss: -0.021087646484375|cri_loss: -0.009552001953125|unsuper_loss: 0.0
-average reward score: 3.8671875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (21.04%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1841|ppo_ep: 1|act_loss: -0.023468017578125|cri_loss: -0.010345458984375|unsuper_loss: 0.0
-average reward score: 3.470703125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.92%) |Training time=0.44s (20.50%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1842|ppo_ep: 1|act_loss: -0.05645751953125|cri_loss: -0.026336669921875|unsuper_loss: 0.0
-average reward score: 4.5703125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.65%) |Training time=0.45s (20.72%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1843|ppo_ep: 1|act_loss: 0.034271240234375|cri_loss: 0.0200042724609375|unsuper_loss: 0.0
-average reward score: 3.44140625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.52%) |Training time=0.43s (19.89%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1844|ppo_ep: 1|act_loss: -0.0029296875|cri_loss: -0.0012416839599609375|unsuper_loss: 0.0
-average reward score: 4.8671875
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.96%) |Training time=0.44s (18.77%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.79 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1845|ppo_ep: 1|act_loss: 0.023406982421875|cri_loss: 0.01213836669921875|unsuper_loss: 0.0
-average reward score: 4.25390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.21%) |Training time=0.46s (21.22%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1846|ppo_ep: 1|act_loss: -0.0084228515625|cri_loss: -0.002788543701171875|unsuper_loss: 0.0
-average reward score: 4.7890625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.31%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1847|ppo_ep: 1|act_loss: -0.06622314453125|cri_loss: -0.0291748046875|unsuper_loss: 0.0
-average reward score: 4.046875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.35%) |Training time=0.45s (21.07%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1848|ppo_ep: 1|act_loss: 0.03302001953125|cri_loss: 0.017364501953125|unsuper_loss: 0.0
-average reward score: 3.33984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.18%) |Training time=0.46s (21.30%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42
-[2023-04-14 09:55:50,807] [INFO] [logging.py:96:log_dist] [Rank 0] step=1850, skipped=22, lr=[8.621044326011558e-06, 8.621044326011558e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:55:50,825] [INFO] [timer.py:199:stop] epoch=0/micro_step=1850/global_step=1850, RunningAvgSamplesPerSec=107.7581995961723, CurrSamplesPerSec=109.30806060828172, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:55:50,918] [INFO] [logging.py:96:log_dist] [Rank 0] step=1850, skipped=26, lr=[4.469236527444994e-06, 4.469236527444994e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1849|ppo_ep: 1|act_loss: -0.00634765625|cri_loss: 0.00135040283203125|unsuper_loss: 0.0
-average reward score: 3.236328125
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.73s (75.76%) |Training time=0.46s (19.95%) |Others=0.10 (4.29%)|CurSamplesPerSec=14.01 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1850|ppo_ep: 1|act_loss: 0.01006317138671875|cri_loss: 0.010528564453125|unsuper_loss: 0.0
-average reward score: 3.015625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.47s (21.64%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1851|ppo_ep: 1|act_loss: 0.032867431640625|cri_loss: 0.0192413330078125|unsuper_loss: 0.0
-average reward score: 3.171875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.28%) |Training time=0.45s (21.15%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1852|ppo_ep: 1|act_loss: 0.02581787109375|cri_loss: 0.0165252685546875|unsuper_loss: 0.0
-average reward score: 3.73046875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.50%) |Training time=0.45s (20.93%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1853|ppo_ep: 1|act_loss: -0.04046630859375|cri_loss: -0.01678466796875|unsuper_loss: 0.0
-average reward score: 3.263671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.07%) |Training time=0.46s (21.36%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1854|ppo_ep: 1|act_loss: -0.1556396484375|cri_loss: -0.061798095703125|unsuper_loss: 0.0
-average reward score: 3.73046875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.26%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1855|ppo_ep: 1|act_loss: 0.0204010009765625|cri_loss: 0.01122283935546875|unsuper_loss: 0.0
-average reward score: 3.962890625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.13%) |Training time=0.46s (21.30%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1856|ppo_ep: 1|act_loss: 0.038818359375|cri_loss: 0.0240478515625|unsuper_loss: 0.0
-average reward score: 2.8671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.04%) |Training time=0.46s (21.40%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1857|ppo_ep: 1|act_loss: -0.041259765625|cri_loss: -0.0198516845703125|unsuper_loss: 0.0
-average reward score: 4.76953125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.49%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1858|ppo_ep: 1|act_loss: 0.1640625|cri_loss: 0.1064453125|unsuper_loss: 0.0
-average reward score: 3.146484375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.46s (21.56%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.42
-[2023-04-14 09:56:12,515] [INFO] [logging.py:96:log_dist] [Rank 0] step=1860, skipped=22, lr=[8.609549533327585e-06, 8.609549533327585e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:56:12,534] [INFO] [timer.py:199:stop] epoch=0/micro_step=1860/global_step=1860, RunningAvgSamplesPerSec=107.75624465773492, CurrSamplesPerSec=104.92407924083149, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:56:12,628] [INFO] [logging.py:96:log_dist] [Rank 0] step=1860, skipped=26, lr=[4.463292327201862e-06, 4.463292327201862e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1859|ppo_ep: 1|act_loss: -0.017120361328125|cri_loss: -0.00815582275390625|unsuper_loss: 0.0
-average reward score: 3.7578125
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.57%) |Training time=0.47s (20.21%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.77 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1860|ppo_ep: 1|act_loss: 0.0186614990234375|cri_loss: 0.00991058349609375|unsuper_loss: 0.0
-average reward score: 3.82421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.74%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1861|ppo_ep: 1|act_loss: 0.031982421875|cri_loss: 0.0178070068359375|unsuper_loss: 0.0
-average reward score: 4.62109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.83%) |Training time=0.46s (21.60%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1862|ppo_ep: 1|act_loss: 0.03680419921875|cri_loss: 0.020660400390625|unsuper_loss: 0.0
-average reward score: 4.64453125
--------------------------------------------------------------------------------------
-|E2E latency=2.58s |Gather latency=0.00s (0.00%) |Generate time=1.58s (61.54%) |Training time=0.47s (18.13%) |Others=0.52 (20.32%)|CurSamplesPerSec=12.43 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1863|ppo_ep: 1|act_loss: 0.053924560546875|cri_loss: 0.027862548828125|unsuper_loss: 0.0
-average reward score: 5.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.88%) |Training time=0.47s (21.56%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42
-[2023-04-14 09:56:23,836] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
-epoch: 0|step: 1864|ppo_ep: 1|act_loss: 0.0142364501953125|cri_loss: 0.0084381103515625|unsuper_loss: 0.0
-average reward score: 4.68359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.09%) |Training time=0.47s (21.75%) |Others=0.09 (4.16%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1865|ppo_ep: 1|act_loss: 0.005218505859375|cri_loss: 0.003391265869140625|unsuper_loss: 0.0
-average reward score: 4.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.47s (21.80%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1866|ppo_ep: 1|act_loss: 0.020599365234375|cri_loss: 0.01151275634765625|unsuper_loss: 0.0
-average reward score: 4.07421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.67%) |Training time=0.47s (21.80%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1867|ppo_ep: 1|act_loss: -0.00788116455078125|cri_loss: -0.00058746337890625|unsuper_loss: 0.0
-average reward score: 4.55078125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.98%) |Training time=0.46s (21.45%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1868|ppo_ep: 1|act_loss: 0.010833740234375|cri_loss: 0.00745391845703125|unsuper_loss: 0.0
-average reward score: 4.60546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.98%) |Training time=0.46s (21.45%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.42
-[2023-04-14 09:56:34,515] [INFO] [logging.py:96:log_dist] [Rank 0] step=1870, skipped=22, lr=[8.597998644489801e-06, 8.597998644489801e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:56:34,533] [INFO] [timer.py:199:stop] epoch=0/micro_step=1870/global_step=1870, RunningAvgSamplesPerSec=107.7423744536757, CurrSamplesPerSec=106.6015344781027, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:56:34,626] [INFO] [logging.py:96:log_dist] [Rank 0] step=1870, skipped=27, lr=[4.45791766334022e-06, 4.45791766334022e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1869|ppo_ep: 1|act_loss: -0.01727294921875|cri_loss: -0.00601959228515625|unsuper_loss: 0.0
-average reward score: 4.2421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.46s (21.46%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1870|ppo_ep: 1|act_loss: -0.072021484375|cri_loss: -0.029998779296875|unsuper_loss: 0.0
-average reward score: 5.0234375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.26%) |Training time=0.46s (21.18%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1871|ppo_ep: 1|act_loss: 0.0263671875|cri_loss: 0.014129638671875|unsuper_loss: 0.0
-average reward score: 4.63671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.47s (21.68%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1872|ppo_ep: 1|act_loss: -0.03436279296875|cri_loss: -0.015869140625|unsuper_loss: 0.0
-average reward score: 4.3671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.74%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1873|ppo_ep: 1|act_loss: 0.00041961669921875|cri_loss: 0.001628875732421875|unsuper_loss: 0.0
-average reward score: 4.16796875
--------------------------------------------------------------------------------------
-|E2E latency=2.49s |Gather latency=0.00s (0.00%) |Generate time=1.76s (70.73%) |Training time=0.47s (19.01%) |Others=0.26 (10.27%)|CurSamplesPerSec=12.86 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1874|ppo_ep: 1|act_loss: -0.018707275390625|cri_loss: -0.00836181640625|unsuper_loss: 0.0
-average reward score: 5.48046875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.45%) |Training time=0.47s (21.99%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1875|ppo_ep: 1|act_loss: 0.03057861328125|cri_loss: 0.0168304443359375|unsuper_loss: 0.0
-average reward score: 4.69140625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.97%) |Training time=0.46s (21.45%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1876|ppo_ep: 1|act_loss: 0.0498046875|cri_loss: 0.0269012451171875|unsuper_loss: 0.0
-average reward score: 4.11328125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.74%) |Training time=0.47s (21.68%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1877|ppo_ep: 1|act_loss: 0.09320068359375|cri_loss: 0.048553466796875|unsuper_loss: 0.0
-average reward score: 4.85546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.56%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1878|ppo_ep: 1|act_loss: -0.0460205078125|cri_loss: -0.02252197265625|unsuper_loss: 0.0
-average reward score: 4.9921875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.47%) |Training time=0.48s (22.04%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.42
-[2023-04-14 09:56:56,563] [INFO] [logging.py:96:log_dist] [Rank 0] step=1880, skipped=22, lr=[8.58639183071024e-06, 8.58639183071024e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:56:56,581] [INFO] [timer.py:199:stop] epoch=0/micro_step=1880/global_step=1880, RunningAvgSamplesPerSec=107.72234280157105, CurrSamplesPerSec=100.53431047965461, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:56:56,674] [INFO] [logging.py:96:log_dist] [Rank 0] step=1880, skipped=27, lr=[4.451918247401336e-06, 4.451918247401336e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1879|ppo_ep: 1|act_loss: -0.04168701171875|cri_loss: -0.0201873779296875|unsuper_loss: 0.0
-average reward score: 4.00390625
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.57%) |Training time=0.48s (21.13%) |Others=0.10 (4.30%)|CurSamplesPerSec=14.06 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1880|ppo_ep: 1|act_loss: -0.01303863525390625|cri_loss: -0.0061492919921875|unsuper_loss: 0.0
-average reward score: 4.73828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.85%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1881|ppo_ep: 1|act_loss: 0.073974609375|cri_loss: 0.039947509765625|unsuper_loss: 0.0
-average reward score: 4.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.46%) |Training time=0.47s (21.98%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1882|ppo_ep: 1|act_loss: 0.02935791015625|cri_loss: 0.019622802734375|unsuper_loss: 0.0
-average reward score: 4.78125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.92%) |Training time=0.46s (21.50%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1883|ppo_ep: 1|act_loss: 0.012115478515625|cri_loss: 0.00738525390625|unsuper_loss: 0.0
-average reward score: 4.953125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.53%) |Training time=0.47s (21.91%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1884|ppo_ep: 1|act_loss: 0.0086669921875|cri_loss: 0.00626373291015625|unsuper_loss: 0.0
-average reward score: 4.87109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.56%) |Training time=0.47s (21.88%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1885|ppo_ep: 1|act_loss: -0.06195068359375|cri_loss: -0.027618408203125|unsuper_loss: 0.0
-average reward score: 4.78515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.91%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1886|ppo_ep: 1|act_loss: -0.0176544189453125|cri_loss: -0.006420135498046875|unsuper_loss: 0.0
-average reward score: 4.3359375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.70%) |Training time=0.47s (21.72%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1887|ppo_ep: 1|act_loss: -0.0256195068359375|cri_loss: -0.010986328125|unsuper_loss: 0.0
-average reward score: 4.16796875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.55%) |Training time=0.47s (21.89%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1888|ppo_ep: 1|act_loss: 0.05511474609375|cri_loss: 0.02960205078125|unsuper_loss: 0.0
-average reward score: 4.63671875
--------------------------------------------------------------------------------------
-|E2E latency=2.82s |Gather latency=0.00s (0.00%) |Generate time=1.76s (62.23%) |Training time=0.47s (16.67%) |Others=0.60 (21.09%)|CurSamplesPerSec=11.34 |AvgSamplesPerSec=14.42
-[2023-04-14 09:57:18,761] [INFO] [logging.py:96:log_dist] [Rank 0] step=1890, skipped=22, lr=[8.574729264029886e-06, 8.574729264029886e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:57:18,779] [INFO] [timer.py:199:stop] epoch=0/micro_step=1890/global_step=1890, RunningAvgSamplesPerSec=107.70136193271539, CurrSamplesPerSec=103.31760270160629, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:57:18,872] [INFO] [logging.py:96:log_dist] [Rank 0] step=1890, skipped=27, lr=[4.44588989932528e-06, 4.44588989932528e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1889|ppo_ep: 1|act_loss: 0.056121826171875|cri_loss: 0.02935791015625|unsuper_loss: 0.0
-average reward score: 4.796875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.42%) |Training time=0.47s (22.00%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1890|ppo_ep: 1|act_loss: -0.0290985107421875|cri_loss: -0.0125274658203125|unsuper_loss: 0.0
-average reward score: 4.3515625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.13%) |Training time=0.48s (22.31%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1891|ppo_ep: 1|act_loss: 0.17236328125|cri_loss: 0.0926513671875|unsuper_loss: 0.0
-average reward score: 4.29296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.25%) |Training time=0.48s (22.18%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1892|ppo_ep: 1|act_loss: 0.011810302734375|cri_loss: 0.00740814208984375|unsuper_loss: 0.0
-average reward score: 4.796875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.65%) |Training time=0.47s (21.77%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1893|ppo_ep: 1|act_loss: 0.0084075927734375|cri_loss: 0.00481414794921875|unsuper_loss: 0.0
-average reward score: 4.796875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.18%) |Training time=0.49s (22.25%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1894|ppo_ep: 1|act_loss: 0.087890625|cri_loss: 0.047149658203125|unsuper_loss: 0.0
-average reward score: 4.5859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (22.07%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1895|ppo_ep: 1|act_loss: -0.0259246826171875|cri_loss: -0.012298583984375|unsuper_loss: 0.0
-average reward score: 5.16015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.15%) |Training time=0.48s (22.32%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1896|ppo_ep: 1|act_loss: -0.013397216796875|cri_loss: -0.00591278076171875|unsuper_loss: 0.0
-average reward score: 5.09765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.78%) |Training time=0.49s (22.68%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1897|ppo_ep: 1|act_loss: -0.08074951171875|cri_loss: -0.038360595703125|unsuper_loss: 0.0
-average reward score: 5.0703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.75%) |Training time=0.49s (22.68%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1898|ppo_ep: 1|act_loss: -0.0638427734375|cri_loss: -0.029876708984375|unsuper_loss: 0.0
-average reward score: 5.47265625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.53%) |Training time=0.50s (22.97%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.42
-[2023-04-14 09:57:40,428] [INFO] [logging.py:96:log_dist] [Rank 0] step=1900, skipped=22, lr=[8.563011117316109e-06, 8.563011117316109e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:57:40,446] [INFO] [timer.py:199:stop] epoch=0/micro_step=1900/global_step=1900, RunningAvgSamplesPerSec=107.65234667115939, CurrSamplesPerSec=96.84231002618438, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:57:40,796] [INFO] [logging.py:96:log_dist] [Rank 0] step=1900, skipped=27, lr=[4.439832708466712e-06, 4.439832708466712e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1899|ppo_ep: 1|act_loss: -0.0599365234375|cri_loss: -0.029296875|unsuper_loss: 0.0
-average reward score: 4.40234375
--------------------------------------------------------------------------------------
-|E2E latency=2.43s |Gather latency=0.00s (0.00%) |Generate time=1.58s (65.12%) |Training time=0.75s (30.80%) |Others=0.10 (4.09%)|CurSamplesPerSec=13.16 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1900|ppo_ep: 1|act_loss: 0.00838470458984375|cri_loss: 0.00495147705078125|unsuper_loss: 0.0
-average reward score: 5.15625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.84%) |Training time=0.50s (22.66%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1901|ppo_ep: 1|act_loss: 0.01554107666015625|cri_loss: 0.00830841064453125|unsuper_loss: 0.0
-average reward score: 4.25
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.47%) |Training time=0.46s (20.99%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1902|ppo_ep: 1|act_loss: 0.082763671875|cri_loss: 0.04510498046875|unsuper_loss: 0.0
-average reward score: 4.671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.23%) |Training time=0.46s (21.23%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1903|ppo_ep: 1|act_loss: 0.08160400390625|cri_loss: 0.04412841796875|unsuper_loss: 0.0
-average reward score: 4.390625
--------------------------------------------------------------------------------------
-|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.03%) |Training time=0.49s (20.81%) |Others=0.10 (4.16%)|CurSamplesPerSec=13.48 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1904|ppo_ep: 1|act_loss: 0.004364013671875|cri_loss: 0.0028858184814453125|unsuper_loss: 0.0
-average reward score: 4.6640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.16%) |Training time=0.46s (21.26%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1905|ppo_ep: 1|act_loss: 0.008758544921875|cri_loss: 0.005126953125|unsuper_loss: 0.0
-average reward score: 4.859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.83%) |Training time=0.45s (20.64%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1906|ppo_ep: 1|act_loss: -0.04571533203125|cri_loss: -0.0203399658203125|unsuper_loss: 0.0
-average reward score: 4.5390625
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.63s (70.66%) |Training time=0.45s (19.45%) |Others=0.23 (9.89%)|CurSamplesPerSec=13.85 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1907|ppo_ep: 1|act_loss: -0.0204315185546875|cri_loss: -0.00977325439453125|unsuper_loss: 0.0
-average reward score: 4.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.66s (74.35%) |Training time=0.47s (21.17%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.35 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1908|ppo_ep: 1|act_loss: -0.0234527587890625|cri_loss: -0.011016845703125|unsuper_loss: 0.0
-average reward score: 4.91015625
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.73s (75.05%) |Training time=0.48s (20.64%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.88 |AvgSamplesPerSec=14.42
-[2023-04-14 09:58:02,983] [INFO] [logging.py:96:log_dist] [Rank 0] step=1910, skipped=22, lr=[8.551237564260112e-06, 8.551237564260112e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:58:03,001] [INFO] [timer.py:199:stop] epoch=0/micro_step=1910/global_step=1910, RunningAvgSamplesPerSec=107.64097658565429, CurrSamplesPerSec=115.11625720882522, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:58:03,094] [INFO] [logging.py:96:log_dist] [Rank 0] step=1910, skipped=27, lr=[4.433746764607812e-06, 4.433746764607812e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1909|ppo_ep: 1|act_loss: -0.018829345703125|cri_loss: -0.00876617431640625|unsuper_loss: 0.0
-average reward score: 4.64453125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.14%) |Training time=0.44s (20.34%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1910|ppo_ep: 1|act_loss: -0.02069091796875|cri_loss: -0.00942230224609375|unsuper_loss: 0.0
-average reward score: 4.26953125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.48%) |Training time=0.48s (22.00%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1911|ppo_ep: 1|act_loss: 0.02587890625|cri_loss: 0.0135345458984375|unsuper_loss: 0.0
-average reward score: 4.5625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.89%) |Training time=0.47s (21.57%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1912|ppo_ep: 1|act_loss: 0.01739501953125|cri_loss: 0.0093841552734375|unsuper_loss: 0.0
-average reward score: 4.296875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.27%) |Training time=0.48s (22.20%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1913|ppo_ep: 1|act_loss: 0.05316162109375|cri_loss: 0.0273284912109375|unsuper_loss: 0.0
-average reward score: 3.9609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.60%) |Training time=0.47s (21.87%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1914|ppo_ep: 1|act_loss: 0.0217742919921875|cri_loss: 0.01145172119140625|unsuper_loss: 0.0
-average reward score: 4.5546875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.77%) |Training time=0.47s (21.72%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1915|ppo_ep: 1|act_loss: -0.00406646728515625|cri_loss: -0.0015735626220703125|unsuper_loss: 0.0
-average reward score: 4.796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.95%) |Training time=0.47s (21.52%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1916|ppo_ep: 1|act_loss: 0.0229339599609375|cri_loss: 0.01293182373046875|unsuper_loss: 0.0
-average reward score: 4.30859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.47s (21.86%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1917|ppo_ep: 1|act_loss: -0.03863525390625|cri_loss: -0.01763916015625|unsuper_loss: 0.0
-average reward score: 4.296875
--------------------------------------------------------------------------------------
-|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.17%) |Training time=0.51s (21.66%) |Others=0.10 (4.17%)|CurSamplesPerSec=13.53 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1918|ppo_ep: 1|act_loss: 0.020843505859375|cri_loss: 0.0109405517578125|unsuper_loss: 0.0
-average reward score: 4.45703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.28%) |Training time=0.48s (22.18%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.42
-[2023-04-14 09:58:24,899] [INFO] [logging.py:96:log_dist] [Rank 0] step=1920, skipped=22, lr=[8.539408779374354e-06, 8.539408779374354e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:58:24,917] [INFO] [timer.py:199:stop] epoch=0/micro_step=1920/global_step=1920, RunningAvgSamplesPerSec=107.6049387338173, CurrSamplesPerSec=101.45643165445873, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:58:25,010] [INFO] [logging.py:96:log_dist] [Rank 0] step=1920, skipped=27, lr=[4.427632157956951e-06, 4.427632157956951e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1919|ppo_ep: 1|act_loss: -0.022369384765625|cri_loss: -0.01059722900390625|unsuper_loss: 0.0
-average reward score: 4.63671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (22.10%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1920|ppo_ep: 1|act_loss: -0.0789794921875|cri_loss: -0.0374755859375|unsuper_loss: 0.0
-average reward score: 4.06640625
--------------------------------------------------------------------------------------
-|E2E latency=2.51s |Gather latency=0.00s (0.00%) |Generate time=1.61s (64.09%) |Training time=0.49s (19.47%) |Others=0.41 (16.44%)|CurSamplesPerSec=12.77 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1921|ppo_ep: 1|act_loss: -0.027984619140625|cri_loss: -0.0128326416015625|unsuper_loss: 0.0
-average reward score: 4.7109375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.48s (22.00%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1922|ppo_ep: 1|act_loss: 0.013946533203125|cri_loss: 0.0087738037109375|unsuper_loss: 0.0
-average reward score: 4.6796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.48s (22.03%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1923|ppo_ep: 1|act_loss: 0.020965576171875|cri_loss: 0.011932373046875|unsuper_loss: 0.0
-average reward score: 4.68359375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.60%) |Training time=0.48s (21.88%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1924|ppo_ep: 1|act_loss: 0.0313720703125|cri_loss: 0.021392822265625|unsuper_loss: 0.0
-average reward score: 4.8984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.33%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1925|ppo_ep: 1|act_loss: 0.02484130859375|cri_loss: 0.01285552978515625|unsuper_loss: 0.0
-average reward score: 4.859375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.51%) |Training time=0.47s (21.92%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1926|ppo_ep: 1|act_loss: 0.09423828125|cri_loss: 0.052276611328125|unsuper_loss: 0.0
-average reward score: 4.55859375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.68%) |Training time=0.47s (21.76%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1927|ppo_ep: 1|act_loss: -0.0212249755859375|cri_loss: -0.0092926025390625|unsuper_loss: 0.0
-average reward score: 4.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.48s (22.03%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1928|ppo_ep: 1|act_loss: -0.0222015380859375|cri_loss: -0.01053619384765625|unsuper_loss: 0.0
-average reward score: 4.4765625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.73%) |Training time=0.47s (21.70%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.42
-[2023-04-14 09:58:46,890] [INFO] [logging.py:96:log_dist] [Rank 0] step=1930, skipped=22, lr=[8.527524937989964e-06, 8.527524937989964e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:58:46,909] [INFO] [timer.py:199:stop] epoch=0/micro_step=1930/global_step=1930, RunningAvgSamplesPerSec=107.57664485953372, CurrSamplesPerSec=101.2196243603908, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:58:47,001] [INFO] [logging.py:96:log_dist] [Rank 0] step=1930, skipped=27, lr=[4.421488979147349e-06, 4.421488979147349e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1929|ppo_ep: 1|act_loss: -0.042266845703125|cri_loss: -0.0194244384765625|unsuper_loss: 0.0
-average reward score: 4.67578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.32%) |Training time=0.48s (22.14%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1930|ppo_ep: 1|act_loss: -0.01319122314453125|cri_loss: -0.00580596923828125|unsuper_loss: 0.0
-average reward score: 4.4921875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.64%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1931|ppo_ep: 1|act_loss: -0.0440673828125|cri_loss: -0.019989013671875|unsuper_loss: 0.0
-average reward score: 4.90234375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.88%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1932|ppo_ep: 1|act_loss: -0.04925537109375|cri_loss: -0.0204315185546875|unsuper_loss: 0.0
-average reward score: 4.1171875
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.38%) |Training time=0.47s (20.38%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.78 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1933|ppo_ep: 1|act_loss: 0.0242156982421875|cri_loss: 0.01245880126953125|unsuper_loss: 0.0
-average reward score: 4.515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.34%) |Training time=0.48s (22.10%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1934|ppo_ep: 1|act_loss: 0.057586669921875|cri_loss: 0.030181884765625|unsuper_loss: 0.0
-average reward score: 4.71484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.40%) |Training time=0.48s (22.04%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1935|ppo_ep: 1|act_loss: 0.0548095703125|cri_loss: 0.0284881591796875|unsuper_loss: 0.0
-average reward score: 4.77734375
--------------------------------------------------------------------------------------
-|E2E latency=2.07s |Gather latency=0.00s (0.00%) |Generate time=1.58s (76.53%) |Training time=0.39s (18.69%) |Others=0.10 (4.77%)|CurSamplesPerSec=15.49 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1936|ppo_ep: 1|act_loss: 0.04010009765625|cri_loss: 0.02294921875|unsuper_loss: 0.0
-average reward score: 4.83203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.56%) |Training time=0.48s (21.92%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1937|ppo_ep: 1|act_loss: 0.0172882080078125|cri_loss: 0.009521484375|unsuper_loss: 0.0
-average reward score: 4.47265625
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.67%) |Training time=0.47s (20.98%) |Others=0.10 (4.35%)|CurSamplesPerSec=14.16 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1938|ppo_ep: 1|act_loss: -0.036041259765625|cri_loss: -0.0171966552734375|unsuper_loss: 0.0
-average reward score: 4.4453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.35%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.42
-[2023-04-14 09:59:08,767] [INFO] [logging.py:96:log_dist] [Rank 0] step=1940, skipped=22, lr=[8.515586216254137e-06, 8.515586216254137e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:59:08,785] [INFO] [timer.py:199:stop] epoch=0/micro_step=1940/global_step=1940, RunningAvgSamplesPerSec=107.57414139164379, CurrSamplesPerSec=109.40669624006952, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:59:08,878] [INFO] [logging.py:96:log_dist] [Rank 0] step=1940, skipped=27, lr=[4.4153173192357375e-06, 4.4153173192357375e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1939|ppo_ep: 1|act_loss: -0.020263671875|cri_loss: -0.0081634521484375|unsuper_loss: 0.0
-average reward score: 4.7109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.36%) |Training time=0.46s (21.09%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1940|ppo_ep: 1|act_loss: 0.0053558349609375|cri_loss: 0.003795623779296875|unsuper_loss: 0.0
-average reward score: 4.44140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.89%) |Training time=0.46s (21.55%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43
-epoch: 0|step: 1941|ppo_ep: 1|act_loss: -0.0266265869140625|cri_loss: -0.0120849609375|unsuper_loss: 0.0
-average reward score: 4.31640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.47s (21.85%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.43
-epoch: 0|step: 1942|ppo_ep: 1|act_loss: 0.00569915771484375|cri_loss: 0.003177642822265625|unsuper_loss: 0.0
-average reward score: 4.33984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.95%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.43
-epoch: 0|step: 1943|ppo_ep: 1|act_loss: 0.00244903564453125|cri_loss: 0.0018787384033203125|unsuper_loss: 0.0
-average reward score: 4.98828125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.48s (22.08%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43
-epoch: 0|step: 1944|ppo_ep: 1|act_loss: 0.02032470703125|cri_loss: 0.01052093505859375|unsuper_loss: 0.0
-average reward score: 4.91015625
--------------------------------------------------------------------------------------
-|E2E latency=2.46s |Gather latency=0.00s (0.00%) |Generate time=1.59s (64.58%) |Training time=0.48s (19.36%) |Others=0.39 (16.06%)|CurSamplesPerSec=13.03 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1945|ppo_ep: 1|act_loss: 0.0038471221923828125|cri_loss: 0.0023193359375|unsuper_loss: 0.0
-average reward score: 4.13671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.22%) |Training time=0.48s (22.21%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.43
-epoch: 0|step: 1946|ppo_ep: 1|act_loss: -0.00514984130859375|cri_loss: -0.002071380615234375|unsuper_loss: 0.0
-average reward score: 4.83984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.42%) |Training time=0.47s (22.02%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.43
-epoch: 0|step: 1947|ppo_ep: 1|act_loss: -0.0303497314453125|cri_loss: -0.0145416259765625|unsuper_loss: 0.0
-average reward score: 4.73828125
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.12%) |Training time=0.48s (20.65%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.74 |AvgSamplesPerSec=14.43
-epoch: 0|step: 1948|ppo_ep: 1|act_loss: -0.0226287841796875|cri_loss: -0.0107574462890625|unsuper_loss: 0.0
-average reward score: 4.5859375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.19%) |Training time=0.48s (22.23%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.43
-[2023-04-14 09:59:30,807] [INFO] [logging.py:96:log_dist] [Rank 0] step=1950, skipped=22, lr=[8.50359279112753e-06, 8.50359279112753e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:59:30,826] [INFO] [timer.py:199:stop] epoch=0/micro_step=1950/global_step=1950, RunningAvgSamplesPerSec=107.54580213604925, CurrSamplesPerSec=102.16569360734277, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:59:30,918] [INFO] [logging.py:96:log_dist] [Rank 0] step=1950, skipped=27, lr=[4.4091172697010025e-06, 4.4091172697010025e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1949|ppo_ep: 1|act_loss: -0.0047607421875|cri_loss: -0.001953125|unsuper_loss: 0.0
-average reward score: 4.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.40%) |Training time=0.48s (22.07%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.43
-epoch: 0|step: 1950|ppo_ep: 1|act_loss: 0.0032978057861328125|cri_loss: 0.002613067626953125|unsuper_loss: 0.0
-average reward score: 4.453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.29%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.43
-epoch: 0|step: 1951|ppo_ep: 1|act_loss: -0.009979248046875|cri_loss: -0.00467681884765625|unsuper_loss: 0.0
-average reward score: 5.2734375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.30%) |Training time=0.48s (22.15%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43
-epoch: 0|step: 1952|ppo_ep: 1|act_loss: 0.033477783203125|cri_loss: 0.017303466796875|unsuper_loss: 0.0
-average reward score: 5.07421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.40%) |Training time=0.48s (22.04%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.43
-epoch: 0|step: 1953|ppo_ep: 1|act_loss: -0.0017824172973632812|cri_loss: -0.000713348388671875|unsuper_loss: 0.0
-average reward score: 4.69921875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.85%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.43
-epoch: 0|step: 1954|ppo_ep: 1|act_loss: 0.042510986328125|cri_loss: 0.022857666015625|unsuper_loss: 0.0
-average reward score: 4.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.48s (21.96%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.43
-epoch: 0|step: 1955|ppo_ep: 1|act_loss: 0.01194000244140625|cri_loss: 0.0062408447265625|unsuper_loss: 0.0
-average reward score: 4.44140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.03%) |Training time=0.48s (22.42%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.43
-epoch: 0|step: 1956|ppo_ep: 1|act_loss: -0.025299072265625|cri_loss: -0.0088348388671875|unsuper_loss: 0.0
-average reward score: 3.568359375
--------------------------------------------------------------------------------------
-|E2E latency=2.89s |Gather latency=0.00s (0.00%) |Generate time=1.58s (54.65%) |Training time=0.48s (16.54%) |Others=0.83 (28.80%)|CurSamplesPerSec=11.08 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1957|ppo_ep: 1|act_loss: -0.0439453125|cri_loss: -0.02105712890625|unsuper_loss: 0.0
-average reward score: 4.96484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.86%) |Training time=0.49s (22.59%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1958|ppo_ep: 1|act_loss: -0.02569580078125|cri_loss: -0.0119781494140625|unsuper_loss: 0.0
-average reward score: 4.5
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.85%) |Training time=0.49s (22.60%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.42
-[2023-04-14 09:59:53,126] [INFO] [logging.py:96:log_dist] [Rank 0] step=1960, skipped=22, lr=[8.491544840381637e-06, 8.491544840381637e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 09:59:53,144] [INFO] [timer.py:199:stop] epoch=0/micro_step=1960/global_step=1960, RunningAvgSamplesPerSec=107.5075454772204, CurrSamplesPerSec=98.59728841847918, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 09:59:53,237] [INFO] [logging.py:96:log_dist] [Rank 0] step=1960, skipped=27, lr=[4.4028889224428365e-06, 4.4028889224428365e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1959|ppo_ep: 1|act_loss: -0.0106353759765625|cri_loss: -0.004428863525390625|unsuper_loss: 0.0
-average reward score: 5.08203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.79%) |Training time=0.49s (22.67%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1960|ppo_ep: 1|act_loss: -0.01139068603515625|cri_loss: -0.005153656005859375|unsuper_loss: 0.0
-average reward score: 4.3515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.68%) |Training time=0.49s (22.77%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.43
-epoch: 0|step: 1961|ppo_ep: 1|act_loss: 0.00475311279296875|cri_loss: 0.0025424957275390625|unsuper_loss: 0.0
-average reward score: 4.65625
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.74s (74.80%) |Training time=0.49s (20.97%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.79 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1962|ppo_ep: 1|act_loss: 0.01065826416015625|cri_loss: 0.007080078125|unsuper_loss: 0.0
-average reward score: 4.80078125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.96%) |Training time=0.49s (22.50%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.43
-epoch: 0|step: 1963|ppo_ep: 1|act_loss: -0.0230560302734375|cri_loss: -0.0100250244140625|unsuper_loss: 0.0
-average reward score: 4.3203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.91%) |Training time=0.49s (22.54%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43
-epoch: 0|step: 1964|ppo_ep: 1|act_loss: -0.007442474365234375|cri_loss: -0.0033111572265625|unsuper_loss: 0.0
-average reward score: 4.89453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.69%) |Training time=0.49s (22.46%) |Others=0.10 (4.85%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.43
-epoch: 0|step: 1965|ppo_ep: 1|act_loss: -0.0241851806640625|cri_loss: -0.0106658935546875|unsuper_loss: 0.0
-average reward score: 4.6171875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.12%) |Training time=0.49s (22.36%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.43
-epoch: 0|step: 1966|ppo_ep: 1|act_loss: 0.024444580078125|cri_loss: 0.0130615234375|unsuper_loss: 0.0
-average reward score: 4.6796875
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.67s (74.12%) |Training time=0.49s (21.54%) |Others=0.10 (4.35%)|CurSamplesPerSec=14.18 |AvgSamplesPerSec=14.43
-epoch: 0|step: 1967|ppo_ep: 1|act_loss: 0.037841796875|cri_loss: 0.02105712890625|unsuper_loss: 0.0
-average reward score: 4.70703125
--------------------------------------------------------------------------------------
-|E2E latency=2.87s |Gather latency=0.00s (0.00%) |Generate time=1.57s (54.79%) |Training time=0.49s (17.24%) |Others=0.80 (27.96%)|CurSamplesPerSec=11.16 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1968|ppo_ep: 1|act_loss: 0.021453857421875|cri_loss: 0.01126861572265625|unsuper_loss: 0.0
-average reward score: 4.43359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.02%) |Training time=0.47s (21.43%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.42
-[2023-04-14 10:00:15,737] [INFO] [logging.py:96:log_dist] [Rank 0] step=1970, skipped=22, lr=[8.479442542596152e-06, 8.479442542596152e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:00:15,755] [INFO] [timer.py:199:stop] epoch=0/micro_step=1970/global_step=1970, RunningAvgSamplesPerSec=107.4582436204539, CurrSamplesPerSec=94.59606963662851, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:00:15,847] [INFO] [logging.py:96:log_dist] [Rank 0] step=1970, skipped=27, lr=[4.39663236978037e-06, 4.39663236978037e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1969|ppo_ep: 1|act_loss: 0.04022216796875|cri_loss: 0.022247314453125|unsuper_loss: 0.0
-average reward score: 4.65234375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.56%) |Training time=0.50s (22.96%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1970|ppo_ep: 1|act_loss: 0.002838134765625|cri_loss: 0.002742767333984375|unsuper_loss: 0.0
-average reward score: 4.7578125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.26%) |Training time=0.51s (23.23%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1971|ppo_ep: 1|act_loss: 0.0199432373046875|cri_loss: 0.0103759765625|unsuper_loss: 0.0
-average reward score: 4.5
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.22%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1972|ppo_ep: 1|act_loss: 0.04071044921875|cri_loss: 0.0208740234375|unsuper_loss: 0.0
-average reward score: 4.66015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.94%) |Training time=0.49s (22.53%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1973|ppo_ep: 1|act_loss: -0.02496337890625|cri_loss: -0.011505126953125|unsuper_loss: 0.0
-average reward score: 4.25
--------------------------------------------------------------------------------------
-|E2E latency=2.51s |Gather latency=0.00s (0.00%) |Generate time=1.58s (62.97%) |Training time=0.49s (19.54%) |Others=0.44 (17.49%)|CurSamplesPerSec=12.73 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1974|ppo_ep: 1|act_loss: -0.0089111328125|cri_loss: -0.00240325927734375|unsuper_loss: 0.0
-average reward score: 4.4765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.12%) |Training time=0.48s (22.33%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1975|ppo_ep: 1|act_loss: 0.035888671875|cri_loss: 0.019439697265625|unsuper_loss: 0.0
-average reward score: 4.53125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.63%) |Training time=0.50s (22.83%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1976|ppo_ep: 1|act_loss: -0.0299835205078125|cri_loss: -0.0144195556640625|unsuper_loss: 0.0
-average reward score: 4.828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.02%) |Training time=0.48s (22.41%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1977|ppo_ep: 1|act_loss: 0.12469482421875|cri_loss: 0.06622314453125|unsuper_loss: 0.0
-average reward score: 4.9140625
--------------------------------------------------------------------------------------
-|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.34%) |Training time=0.51s (21.49%) |Others=0.10 (4.17%)|CurSamplesPerSec=13.59 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1978|ppo_ep: 1|act_loss: 0.00582122802734375|cri_loss: 0.00420379638671875|unsuper_loss: 0.0
-average reward score: 4.8125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.79%) |Training time=0.49s (22.67%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42
-[2023-04-14 10:00:37,960] [INFO] [logging.py:96:log_dist] [Rank 0] step=1980, skipped=22, lr=[8.467286077156324e-06, 8.467286077156324e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:00:37,978] [INFO] [timer.py:199:stop] epoch=0/micro_step=1980/global_step=1980, RunningAvgSamplesPerSec=107.40380143627976, CurrSamplesPerSec=102.09924531086331, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:00:38,071] [INFO] [logging.py:96:log_dist] [Rank 0] step=1980, skipped=27, lr=[4.3903477044508066e-06, 4.3903477044508066e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1979|ppo_ep: 1|act_loss: -0.00296783447265625|cri_loss: -0.0011501312255859375|unsuper_loss: 0.0
-average reward score: 4.5234375
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.59s (69.84%) |Training time=0.48s (20.97%) |Others=0.21 (9.19%)|CurSamplesPerSec=14.10 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1980|ppo_ep: 1|act_loss: -0.04632568359375|cri_loss: -0.021453857421875|unsuper_loss: 0.0
-average reward score: 4.54296875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.48s (22.06%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1981|ppo_ep: 1|act_loss: 0.01451873779296875|cri_loss: 0.0079803466796875|unsuper_loss: 0.0
-average reward score: 4.2109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.48s (22.09%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1982|ppo_ep: 1|act_loss: -0.0458984375|cri_loss: -0.02252197265625|unsuper_loss: 0.0
-average reward score: 4.80859375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.05%) |Training time=0.48s (22.41%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1983|ppo_ep: 1|act_loss: 0.006771087646484375|cri_loss: 0.0038604736328125|unsuper_loss: 0.0
-average reward score: 5.26953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.57%) |Training time=0.48s (21.90%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1984|ppo_ep: 1|act_loss: 0.028411865234375|cri_loss: 0.0149078369140625|unsuper_loss: 0.0
-average reward score: 4.8515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.30%) |Training time=0.48s (22.15%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1985|ppo_ep: 1|act_loss: -0.0034332275390625|cri_loss: -0.0011224746704101562|unsuper_loss: 0.0
-average reward score: 4.21875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.05%) |Training time=0.48s (22.41%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1986|ppo_ep: 1|act_loss: 0.025604248046875|cri_loss: 0.01398468017578125|unsuper_loss: 0.0
-average reward score: 4.76171875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.25%) |Training time=0.48s (22.18%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1987|ppo_ep: 1|act_loss: 0.01654052734375|cri_loss: 0.00942230224609375|unsuper_loss: 0.0
-average reward score: 4.671875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.24%) |Training time=0.48s (22.20%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1988|ppo_ep: 1|act_loss: -0.04742431640625|cri_loss: -0.0215301513671875|unsuper_loss: 0.0
-average reward score: 5.08984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.08%) |Training time=0.48s (22.36%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.42
-[2023-04-14 10:00:59,653] [INFO] [logging.py:96:log_dist] [Rank 0] step=1990, skipped=22, lr=[8.455075624250293e-06, 8.455075624250293e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:00:59,671] [INFO] [timer.py:199:stop] epoch=0/micro_step=1990/global_step=1990, RunningAvgSamplesPerSec=107.3690434626594, CurrSamplesPerSec=99.88764347649415, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:00:59,764] [INFO] [logging.py:96:log_dist] [Rank 0] step=1990, skipped=27, lr=[4.3840350196080485e-06, 4.3840350196080485e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1989|ppo_ep: 1|act_loss: 0.0305023193359375|cri_loss: 0.017486572265625|unsuper_loss: 0.0
-average reward score: 4.38671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.05%) |Training time=0.48s (22.40%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43
-epoch: 0|step: 1990|ppo_ep: 1|act_loss: 0.03271484375|cri_loss: 0.017822265625|unsuper_loss: 0.0
-average reward score: 4.95703125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.24%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.43
-epoch: 0|step: 1991|ppo_ep: 1|act_loss: 0.031494140625|cri_loss: 0.0171051025390625|unsuper_loss: 0.0
-average reward score: 4.25390625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.78%) |Training time=0.49s (22.66%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.43
-epoch: 0|step: 1992|ppo_ep: 1|act_loss: -0.0189361572265625|cri_loss: -0.00396728515625|unsuper_loss: 0.0
-average reward score: 4.7265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.89%) |Training time=0.49s (22.55%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.43
-epoch: 0|step: 1993|ppo_ep: 1|act_loss: -0.026123046875|cri_loss: -0.011444091796875|unsuper_loss: 0.0
-average reward score: 4.34765625
--------------------------------------------------------------------------------------
-|E2E latency=2.57s |Gather latency=0.00s (0.00%) |Generate time=1.74s (67.83%) |Training time=0.49s (19.00%) |Others=0.34 (13.16%)|CurSamplesPerSec=12.47 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1994|ppo_ep: 1|act_loss: 0.00157928466796875|cri_loss: 0.0015621185302734375|unsuper_loss: 0.0
-average reward score: 4.7890625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.02%) |Training time=0.49s (22.46%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1995|ppo_ep: 1|act_loss: 0.04425048828125|cri_loss: 0.022979736328125|unsuper_loss: 0.0
-average reward score: 4.734375
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.68s (74.22%) |Training time=0.48s (21.39%) |Others=0.10 (4.39%)|CurSamplesPerSec=14.12 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1996|ppo_ep: 1|act_loss: 0.025238037109375|cri_loss: 0.01305389404296875|unsuper_loss: 0.0
-average reward score: 4.8046875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.04%) |Training time=0.48s (22.40%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1997|ppo_ep: 1|act_loss: 0.02642822265625|cri_loss: 0.01448822021484375|unsuper_loss: 0.0
-average reward score: 4.53125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.11%) |Training time=0.48s (22.33%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.42
-epoch: 0|step: 1998|ppo_ep: 1|act_loss: 0.01488494873046875|cri_loss: 0.0079498291015625|unsuper_loss: 0.0
-average reward score: 4.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.91%) |Training time=0.47s (21.54%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.43
-[2023-04-14 10:01:21,774] [INFO] [logging.py:96:log_dist] [Rank 0] step=2000, skipped=22, lr=[8.442811364866433e-06, 8.442811364866433e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:01:21,793] [INFO] [timer.py:199:stop] epoch=0/micro_step=2000/global_step=2000, RunningAvgSamplesPerSec=107.33059698092995, CurrSamplesPerSec=98.97223170601313, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:01:21,885] [INFO] [logging.py:96:log_dist] [Rank 0] step=2000, skipped=27, lr=[4.3776944088213124e-06, 4.3776944088213124e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 1999|ppo_ep: 1|act_loss: -0.0130462646484375|cri_loss: -0.005878448486328125|unsuper_loss: 0.0
-average reward score: 4.984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.02%) |Training time=0.49s (22.45%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2000|ppo_ep: 1|act_loss: 0.0110015869140625|cri_loss: 0.00705718994140625|unsuper_loss: 0.0
-average reward score: 4.8046875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.08%) |Training time=0.48s (22.37%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2001|ppo_ep: 1|act_loss: -0.0264434814453125|cri_loss: -0.012451171875|unsuper_loss: 0.0
-average reward score: 4.8671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.26%) |Training time=0.48s (22.19%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2002|ppo_ep: 1|act_loss: -0.00392913818359375|cri_loss: 7.2479248046875e-05|unsuper_loss: 0.0
-average reward score: 5.265625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.20%) |Training time=0.49s (22.28%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2003|ppo_ep: 1|act_loss: -0.0116119384765625|cri_loss: -0.00412750244140625|unsuper_loss: 0.0
-average reward score: 4.71875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.08%) |Training time=0.48s (22.37%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2004|ppo_ep: 1|act_loss: 0.0240478515625|cri_loss: 0.01318359375|unsuper_loss: 0.0
-average reward score: 5.05078125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.24%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2005|ppo_ep: 1|act_loss: -0.0154266357421875|cri_loss: -0.005596160888671875|unsuper_loss: 0.0
-average reward score: 5.5
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.97%) |Training time=0.48s (22.49%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2006|ppo_ep: 1|act_loss: -0.040557861328125|cri_loss: -0.0189056396484375|unsuper_loss: 0.0
-average reward score: 5.0
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.83%) |Training time=0.49s (22.62%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.43
-[2023-04-14 10:01:39,335] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 2007|ppo_ep: 1|act_loss: -0.044219970703125|cri_loss: -0.0193023681640625|unsuper_loss: 0.0
-average reward score: 5.51171875
--------------------------------------------------------------------------------------
-|E2E latency=2.43s |Gather latency=0.00s (0.00%) |Generate time=1.63s (67.23%) |Training time=0.59s (24.41%) |Others=0.20 (8.35%)|CurSamplesPerSec=13.17 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2008|ppo_ep: 1|act_loss: -0.019622802734375|cri_loss: -0.0090789794921875|unsuper_loss: 0.0
-average reward score: 5.20703125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.03%) |Training time=0.48s (22.41%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.43
-[2023-04-14 10:01:43,655] [INFO] [logging.py:96:log_dist] [Rank 0] step=2010, skipped=22, lr=[8.43049348079065e-06, 8.43049348079065e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:01:43,673] [INFO] [timer.py:199:stop] epoch=0/micro_step=2010/global_step=2010, RunningAvgSamplesPerSec=107.27663857336522, CurrSamplesPerSec=97.40335191638643, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:01:43,766] [INFO] [logging.py:96:log_dist] [Rank 0] step=2010, skipped=28, lr=[4.37196406009969e-06, 4.37196406009969e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2009|ppo_ep: 1|act_loss: -0.005809783935546875|cri_loss: -0.000865936279296875|unsuper_loss: 0.0
-average reward score: 5.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.76%) |Training time=0.49s (22.70%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2010|ppo_ep: 1|act_loss: -0.00772857666015625|cri_loss: -0.00311279296875|unsuper_loss: 0.0
-average reward score: 5.61328125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.23%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2011|ppo_ep: 1|act_loss: -0.00501251220703125|cri_loss: -0.0017919540405273438|unsuper_loss: 0.0
-average reward score: 5.13671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.82%) |Training time=0.49s (22.63%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2012|ppo_ep: 1|act_loss: 0.0738525390625|cri_loss: 0.038116455078125|unsuper_loss: 0.0
-average reward score: 5.47265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.04%) |Training time=0.48s (22.40%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2013|ppo_ep: 1|act_loss: 0.0192413330078125|cri_loss: 0.0116424560546875|unsuper_loss: 0.0
-average reward score: 4.9765625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.33%) |Training time=0.51s (23.15%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2014|ppo_ep: 1|act_loss: 0.0540771484375|cri_loss: 0.029754638671875|unsuper_loss: 0.0
-average reward score: 5.171875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.17%) |Training time=0.51s (23.33%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2015|ppo_ep: 1|act_loss: -0.0133514404296875|cri_loss: -0.00567626953125|unsuper_loss: 0.0
-average reward score: 5.25
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.29%) |Training time=0.45s (20.97%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2016|ppo_ep: 1|act_loss: 0.014251708984375|cri_loss: 0.00818634033203125|unsuper_loss: 0.0
-average reward score: 5.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.17%) |Training time=0.42s (19.22%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2017|ppo_ep: 1|act_loss: -0.0249481201171875|cri_loss: -0.0024261474609375|unsuper_loss: 0.0
-average reward score: 5.4453125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.59%) |Training time=0.46s (20.89%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2018|ppo_ep: 1|act_loss: 0.0255126953125|cri_loss: 0.013580322265625|unsuper_loss: 0.0
-average reward score: 5.70703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.35%) |Training time=0.46s (21.09%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.43
-[2023-04-14 10:02:05,348] [INFO] [logging.py:96:log_dist] [Rank 0] step=2020, skipped=22, lr=[8.418122154603703e-06, 8.418122154603703e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:02:05,367] [INFO] [timer.py:199:stop] epoch=0/micro_step=2020/global_step=2020, RunningAvgSamplesPerSec=107.26067687682459, CurrSamplesPerSec=111.90816404803911, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:02:05,459] [INFO] [logging.py:96:log_dist] [Rank 0] step=2020, skipped=28, lr=[4.36557064928396e-06, 4.36557064928396e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2019|ppo_ep: 1|act_loss: 0.04864501953125|cri_loss: 0.0251312255859375|unsuper_loss: 0.0
-average reward score: 5.21875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.70%) |Training time=0.45s (20.75%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2020|ppo_ep: 1|act_loss: -0.04766845703125|cri_loss: -0.0232086181640625|unsuper_loss: 0.0
-average reward score: 5.12890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.18%) |Training time=0.46s (21.27%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2021|ppo_ep: 1|act_loss: 0.019683837890625|cri_loss: 0.011077880859375|unsuper_loss: 0.0
-average reward score: 4.96484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.69%) |Training time=0.45s (20.62%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2022|ppo_ep: 1|act_loss: 0.02667236328125|cri_loss: 0.018280029296875|unsuper_loss: 0.0
-average reward score: 5.0859375
--------------------------------------------------------------------------------------
-|E2E latency=2.46s |Gather latency=0.00s (0.00%) |Generate time=1.78s (72.36%) |Training time=0.46s (18.90%) |Others=0.21 (8.74%)|CurSamplesPerSec=13.01 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2023|ppo_ep: 1|act_loss: -0.0203704833984375|cri_loss: -0.00766754150390625|unsuper_loss: 0.0
-average reward score: 4.828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.67%) |Training time=0.47s (21.77%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2024|ppo_ep: 1|act_loss: -0.0394287109375|cri_loss: -0.018646240234375|unsuper_loss: 0.0
-average reward score: 5.32421875
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.62s (72.14%) |Training time=0.53s (23.49%) |Others=0.10 (4.37%)|CurSamplesPerSec=14.25 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2025|ppo_ep: 1|act_loss: 0.00598907470703125|cri_loss: 0.00673675537109375|unsuper_loss: 0.0
-average reward score: 5.546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.05%) |Training time=0.46s (21.39%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2026|ppo_ep: 1|act_loss: -0.01861572265625|cri_loss: -0.0078582763671875|unsuper_loss: 0.0
-average reward score: 5.20703125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.08%) |Training time=0.46s (21.32%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2027|ppo_ep: 1|act_loss: 0.064453125|cri_loss: 0.033477783203125|unsuper_loss: 0.0
-average reward score: 5.52734375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.28%) |Training time=0.45s (21.10%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2028|ppo_ep: 1|act_loss: 0.01007080078125|cri_loss: 0.00605010986328125|unsuper_loss: 0.0
-average reward score: 5.69921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.27%) |Training time=0.48s (22.21%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.43
-[2023-04-14 10:02:27,376] [INFO] [logging.py:96:log_dist] [Rank 0] step=2030, skipped=22, lr=[8.405697569678487e-06, 8.405697569678487e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:02:27,394] [INFO] [timer.py:199:stop] epoch=0/micro_step=2030/global_step=2030, RunningAvgSamplesPerSec=107.2414902830951, CurrSamplesPerSec=93.43053466253548, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:02:27,487] [INFO] [logging.py:96:log_dist] [Rank 0] step=2030, skipped=28, lr=[4.3591495862107625e-06, 4.3591495862107625e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2029|ppo_ep: 1|act_loss: 0.07958984375|cri_loss: 0.04119873046875|unsuper_loss: 0.0
-average reward score: 4.9921875
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.56%) |Training time=0.51s (22.99%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2030|ppo_ep: 1|act_loss: 0.0701904296875|cri_loss: 0.036529541015625|unsuper_loss: 0.0
-average reward score: 4.5390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.47%) |Training time=0.48s (22.00%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2031|ppo_ep: 1|act_loss: 0.03338623046875|cri_loss: 0.017608642578125|unsuper_loss: 0.0
-average reward score: 5.14453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.79%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2032|ppo_ep: 1|act_loss: -0.0008573532104492188|cri_loss: -0.00025463104248046875|unsuper_loss: 0.0
-average reward score: 4.62890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.48s (22.03%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2033|ppo_ep: 1|act_loss: -0.0310211181640625|cri_loss: -0.0144195556640625|unsuper_loss: 0.0
-average reward score: 5.296875
--------------------------------------------------------------------------------------
-|E2E latency=2.08s |Gather latency=0.00s (0.00%) |Generate time=1.59s (76.65%) |Training time=0.39s (18.61%) |Others=0.10 (4.75%)|CurSamplesPerSec=15.42 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2034|ppo_ep: 1|act_loss: -0.03662109375|cri_loss: -0.0159759521484375|unsuper_loss: 0.0
-average reward score: 5.23828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.28%) |Training time=0.48s (22.17%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2035|ppo_ep: 1|act_loss: -0.04473876953125|cri_loss: -0.021026611328125|unsuper_loss: 0.0
-average reward score: 5.50390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.04%) |Training time=0.49s (22.42%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2036|ppo_ep: 1|act_loss: -0.072265625|cri_loss: -0.034637451171875|unsuper_loss: 0.0
-average reward score: 4.8984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.90%) |Training time=0.49s (22.58%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2037|ppo_ep: 1|act_loss: 0.021484375|cri_loss: 0.01324462890625|unsuper_loss: 0.0
-average reward score: 5.16796875
--------------------------------------------------------------------------------------
-|E2E latency=2.94s |Gather latency=0.00s (0.00%) |Generate time=1.58s (53.92%) |Training time=0.59s (20.15%) |Others=0.76 (25.93%)|CurSamplesPerSec=10.90 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2038|ppo_ep: 1|act_loss: 0.164794921875|cri_loss: 0.093994140625|unsuper_loss: 0.0
-average reward score: 4.7421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.92%) |Training time=0.49s (22.54%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.43
-[2023-04-14 10:02:50,207] [INFO] [logging.py:96:log_dist] [Rank 0] step=2040, skipped=22, lr=[8.393219910177327e-06, 8.393219910177327e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:02:50,225] [INFO] [timer.py:199:stop] epoch=0/micro_step=2040/global_step=2040, RunningAvgSamplesPerSec=107.20764921501167, CurrSamplesPerSec=99.54234960114154, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:02:50,317] [INFO] [logging.py:96:log_dist] [Rank 0] step=2040, skipped=28, lr=[4.352700966055743e-06, 4.352700966055743e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2039|ppo_ep: 1|act_loss: -0.006927490234375|cri_loss: -0.003032684326171875|unsuper_loss: 0.0
-average reward score: 5.71875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.12%) |Training time=0.48s (22.36%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2040|ppo_ep: 1|act_loss: 0.061065673828125|cri_loss: 0.032318115234375|unsuper_loss: 0.0
-average reward score: 5.36328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.03%) |Training time=0.49s (22.43%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2041|ppo_ep: 1|act_loss: 0.09600830078125|cri_loss: 0.05084228515625|unsuper_loss: 0.0
-average reward score: 5.33984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.49s (22.41%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2042|ppo_ep: 1|act_loss: 0.067626953125|cri_loss: 0.037261962890625|unsuper_loss: 0.0
-average reward score: 5.03515625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.85%) |Training time=0.49s (22.64%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2043|ppo_ep: 1|act_loss: 0.01378631591796875|cri_loss: 0.0082244873046875|unsuper_loss: 0.0
-average reward score: 5.65625
--------------------------------------------------------------------------------------
-|E2E latency=2.73s |Gather latency=0.00s (0.00%) |Generate time=1.60s (58.73%) |Training time=0.49s (17.81%) |Others=0.64 (23.46%)|CurSamplesPerSec=11.73 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2044|ppo_ep: 1|act_loss: -0.0100250244140625|cri_loss: -0.004634857177734375|unsuper_loss: 0.0
-average reward score: 5.12109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.37%) |Training time=0.47s (22.02%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2045|ppo_ep: 1|act_loss: -0.02764892578125|cri_loss: -0.0125579833984375|unsuper_loss: 0.0
-average reward score: 5.59765625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.45%) |Training time=0.50s (23.05%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2046|ppo_ep: 1|act_loss: -0.019775390625|cri_loss: -0.0078277587890625|unsuper_loss: 0.0
-average reward score: 5.625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.60%) |Training time=0.50s (22.80%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2047|ppo_ep: 1|act_loss: -0.0533447265625|cri_loss: -0.0249481201171875|unsuper_loss: 0.0
-average reward score: 4.87109375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.77%) |Training time=0.50s (22.74%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2048|ppo_ep: 1|act_loss: -0.0435791015625|cri_loss: -0.020751953125|unsuper_loss: 0.0
-average reward score: 5.45703125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.92%) |Training time=0.49s (22.57%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.43
-[2023-04-14 10:03:12,500] [INFO] [logging.py:96:log_dist] [Rank 0] step=2050, skipped=22, lr=[8.380689361049238e-06, 8.380689361049238e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:03:12,724] [INFO] [timer.py:199:stop] epoch=0/micro_step=2050/global_step=2050, RunningAvgSamplesPerSec=107.1224647769258, CurrSamplesPerSec=61.993959389620294, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:03:12,816] [INFO] [logging.py:96:log_dist] [Rank 0] step=2050, skipped=28, lr=[4.34622488440301e-06, 4.34622488440301e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2049|ppo_ep: 1|act_loss: -0.03973388671875|cri_loss: -0.0189056396484375|unsuper_loss: 0.0
-average reward score: 5.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.59s (67.24%) |Training time=0.68s (28.64%) |Others=0.10 (4.13%)|CurSamplesPerSec=13.50 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2050|ppo_ep: 1|act_loss: 0.01849365234375|cri_loss: 0.0101165771484375|unsuper_loss: 0.0
-average reward score: 5.21875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.48s (22.10%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2051|ppo_ep: 1|act_loss: 0.038970947265625|cri_loss: 0.020233154296875|unsuper_loss: 0.0
-average reward score: 5.55859375
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.81%) |Training time=0.49s (21.00%) |Others=0.10 (4.19%)|CurSamplesPerSec=13.65 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2052|ppo_ep: 1|act_loss: 0.0194244384765625|cri_loss: 0.010223388671875|unsuper_loss: 0.0
-average reward score: 4.875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.52%) |Training time=0.48s (21.99%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2053|ppo_ep: 1|act_loss: 0.0357666015625|cri_loss: 0.0200347900390625|unsuper_loss: 0.0
-average reward score: 5.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.60%) |Training time=0.48s (21.05%) |Others=0.10 (4.35%)|CurSamplesPerSec=14.17 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2054|ppo_ep: 1|act_loss: 0.01934814453125|cri_loss: 0.01020050048828125|unsuper_loss: 0.0
-average reward score: 5.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.58s (67.31%) |Training time=0.46s (19.47%) |Others=0.31 (13.22%)|CurSamplesPerSec=13.60 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2055|ppo_ep: 1|act_loss: -0.04913330078125|cri_loss: -0.020721435546875|unsuper_loss: 0.0
-average reward score: 5.26953125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.95%) |Training time=0.46s (21.47%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2056|ppo_ep: 1|act_loss: -0.0174407958984375|cri_loss: -0.0081787109375|unsuper_loss: 0.0
-average reward score: 5.45703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.87%) |Training time=0.47s (21.58%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2057|ppo_ep: 1|act_loss: -0.0110015869140625|cri_loss: -0.004688262939453125|unsuper_loss: 0.0
-average reward score: 5.203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.62%) |Training time=0.47s (21.80%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2058|ppo_ep: 1|act_loss: -0.01165771484375|cri_loss: -0.00550079345703125|unsuper_loss: 0.0
-average reward score: 5.21875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.93%) |Training time=0.46s (21.52%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.43
-[2023-04-14 10:03:34,775] [INFO] [logging.py:96:log_dist] [Rank 0] step=2060, skipped=22, lr=[8.368106108027184e-06, 8.368106108027184e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:03:34,794] [INFO] [timer.py:199:stop] epoch=0/micro_step=2060/global_step=2060, RunningAvgSamplesPerSec=107.10558702661527, CurrSamplesPerSec=106.73938194768076, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:03:34,886] [INFO] [logging.py:96:log_dist] [Rank 0] step=2060, skipped=28, lr=[4.339721437243713e-06, 4.339721437243713e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2059|ppo_ep: 1|act_loss: 0.025543212890625|cri_loss: 0.01482391357421875|unsuper_loss: 0.0
-average reward score: 5.56640625
--------------------------------------------------------------------------------------
-|E2E latency=2.70s |Gather latency=0.00s (0.00%) |Generate time=1.58s (58.77%) |Training time=0.46s (17.16%) |Others=0.65 (24.07%)|CurSamplesPerSec=11.87 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2060|ppo_ep: 1|act_loss: -0.03955078125|cri_loss: -0.0190887451171875|unsuper_loss: 0.0
-average reward score: 4.8671875
--------------------------------------------------------------------------------------
-|E2E latency=2.06s |Gather latency=0.00s (0.00%) |Generate time=1.58s (76.50%) |Training time=0.39s (18.73%) |Others=0.10 (4.77%)|CurSamplesPerSec=15.50 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2061|ppo_ep: 1|act_loss: 0.002399444580078125|cri_loss: 0.0017910003662109375|unsuper_loss: 0.0
-average reward score: 5.2578125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.02%) |Training time=0.46s (21.41%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2062|ppo_ep: 1|act_loss: -0.0010166168212890625|cri_loss: -0.0002918243408203125|unsuper_loss: 0.0
-average reward score: 5.12109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.74%) |Training time=0.47s (21.69%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2063|ppo_ep: 1|act_loss: -0.055419921875|cri_loss: -0.0272369384765625|unsuper_loss: 0.0
-average reward score: 5.3046875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.76%) |Training time=0.47s (21.69%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2064|ppo_ep: 1|act_loss: 0.029693603515625|cri_loss: 0.0154266357421875|unsuper_loss: 0.0
-average reward score: 5.48828125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.69%) |Training time=0.47s (21.75%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2065|ppo_ep: 1|act_loss: 0.0760498046875|cri_loss: 0.03997802734375|unsuper_loss: 0.0
-average reward score: 4.6796875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.55%) |Training time=0.47s (21.89%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2066|ppo_ep: 1|act_loss: 0.00406646728515625|cri_loss: 0.00244140625|unsuper_loss: 0.0
-average reward score: 5.515625
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.59%) |Training time=0.47s (20.17%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.82 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2067|ppo_ep: 1|act_loss: 0.0028285980224609375|cri_loss: 0.00255584716796875|unsuper_loss: 0.0
-average reward score: 5.421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.51%) |Training time=0.47s (21.94%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2068|ppo_ep: 1|act_loss: -0.01499176025390625|cri_loss: -0.0057525634765625|unsuper_loss: 0.0
-average reward score: 5.29296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.95%) |Training time=0.46s (21.48%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.43
-[2023-04-14 10:03:56,902] [INFO] [logging.py:96:log_dist] [Rank 0] step=2070, skipped=22, lr=[8.35547033762533e-06, 8.35547033762533e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:03:57,081] [INFO] [timer.py:199:stop] epoch=0/micro_step=2070/global_step=2070, RunningAvgSamplesPerSec=107.08235179869443, CurrSamplesPerSec=68.86106831377825, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:03:57,174] [INFO] [logging.py:96:log_dist] [Rank 0] step=2070, skipped=28, lr=[4.333190720974631e-06, 4.333190720974631e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2069|ppo_ep: 1|act_loss: -0.04583740234375|cri_loss: -0.02099609375|unsuper_loss: 0.0
-average reward score: 5.14453125
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.58s (68.57%) |Training time=0.63s (27.18%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.87 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2070|ppo_ep: 1|act_loss: -0.01213836669921875|cri_loss: -0.00543975830078125|unsuper_loss: 0.0
-average reward score: 5.23828125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.49%) |Training time=0.47s (21.95%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2071|ppo_ep: 1|act_loss: 0.02960205078125|cri_loss: 0.0157470703125|unsuper_loss: 0.0
-average reward score: 4.85546875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.27%) |Training time=0.45s (21.16%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2072|ppo_ep: 1|act_loss: -0.00476837158203125|cri_loss: -0.0019512176513671875|unsuper_loss: 0.0
-average reward score: 5.2734375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.39%) |Training time=0.48s (21.88%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2073|ppo_ep: 1|act_loss: 0.07574462890625|cri_loss: 0.039306640625|unsuper_loss: 0.0
-average reward score: 5.42578125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.35%) |Training time=0.48s (22.15%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2074|ppo_ep: 1|act_loss: 0.0472412109375|cri_loss: 0.0240936279296875|unsuper_loss: 0.0
-average reward score: 5.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.34%) |Training time=0.48s (22.13%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2075|ppo_ep: 1|act_loss: 0.05096435546875|cri_loss: 0.026123046875|unsuper_loss: 0.0
-average reward score: 4.92578125
--------------------------------------------------------------------------------------
-|E2E latency=2.48s |Gather latency=0.00s (0.00%) |Generate time=1.59s (63.90%) |Training time=0.48s (19.21%) |Others=0.42 (16.89%)|CurSamplesPerSec=12.90 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2076|ppo_ep: 1|act_loss: 0.023681640625|cri_loss: 0.0122833251953125|unsuper_loss: 0.0
-average reward score: 4.8828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.34%) |Training time=0.47s (21.99%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2077|ppo_ep: 1|act_loss: 0.01788330078125|cri_loss: 0.0093536376953125|unsuper_loss: 0.0
-average reward score: 5.49609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.11%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2078|ppo_ep: 1|act_loss: 0.04364013671875|cri_loss: 0.02362060546875|unsuper_loss: 0.0
-average reward score: 4.7578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (22.13%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.43
-[2023-04-14 10:04:19,005] [INFO] [logging.py:96:log_dist] [Rank 0] step=2080, skipped=22, lr=[8.342782237136277e-06, 8.342782237136277e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:04:19,024] [INFO] [timer.py:199:stop] epoch=0/micro_step=2080/global_step=2080, RunningAvgSamplesPerSec=107.05842882438776, CurrSamplesPerSec=101.66045801757994, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:04:19,116] [INFO] [logging.py:96:log_dist] [Rank 0] step=2080, skipped=28, lr=[4.326632832396733e-06, 4.326632832396733e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2079|ppo_ep: 1|act_loss: -0.033355712890625|cri_loss: -0.0159149169921875|unsuper_loss: 0.0
-average reward score: 5.23046875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.33%) |Training time=0.48s (22.14%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2080|ppo_ep: 1|act_loss: -2.86102294921875e-05|cri_loss: 0.0017223358154296875|unsuper_loss: 0.0
-average reward score: 5.50390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.51%) |Training time=0.47s (21.94%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2081|ppo_ep: 1|act_loss: -0.0098876953125|cri_loss: -0.00450897216796875|unsuper_loss: 0.0
-average reward score: 5.62109375
--------------------------------------------------------------------------------------
-|E2E latency=2.85s |Gather latency=0.00s (0.00%) |Generate time=1.76s (61.87%) |Training time=0.49s (17.14%) |Others=0.60 (20.98%)|CurSamplesPerSec=11.23 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2082|ppo_ep: 1|act_loss: -0.01531982421875|cri_loss: -0.007282257080078125|unsuper_loss: 0.0
-average reward score: 5.2421875
--------------------------------------------------------------------------------------
-|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.68s (75.09%) |Training time=0.46s (20.52%) |Others=0.10 (4.38%)|CurSamplesPerSec=14.28 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2083|ppo_ep: 1|act_loss: -0.012115478515625|cri_loss: -0.005657196044921875|unsuper_loss: 0.0
-average reward score: 5.5234375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.53%) |Training time=0.45s (20.90%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2084|ppo_ep: 1|act_loss: 0.03741455078125|cri_loss: 0.020172119140625|unsuper_loss: 0.0
-average reward score: 5.7890625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.88%) |Training time=0.46s (21.52%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2085|ppo_ep: 1|act_loss: 0.0025177001953125|cri_loss: 0.0016345977783203125|unsuper_loss: 0.0
-average reward score: 5.4609375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.60%) |Training time=0.47s (21.83%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2086|ppo_ep: 1|act_loss: 0.094970703125|cri_loss: 0.0496826171875|unsuper_loss: 0.0
-average reward score: 5.27734375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.93%) |Training time=0.46s (21.46%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2087|ppo_ep: 1|act_loss: -0.020263671875|cri_loss: -0.00921630859375|unsuper_loss: 0.0
-average reward score: 5.11328125
--------------------------------------------------------------------------------------
-|E2E latency=2.47s |Gather latency=0.00s (0.00%) |Generate time=1.60s (64.66%) |Training time=0.46s (18.46%) |Others=0.42 (16.88%)|CurSamplesPerSec=12.95 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2088|ppo_ep: 1|act_loss: 0.0782470703125|cri_loss: 0.0416259765625|unsuper_loss: 0.0
-average reward score: 5.51171875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.67%) |Training time=0.47s (21.75%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43
-[2023-04-14 10:04:41,595] [INFO] [logging.py:96:log_dist] [Rank 0] step=2090, skipped=22, lr=[8.330041994628283e-06, 8.330041994628283e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:04:41,613] [INFO] [timer.py:199:stop] epoch=0/micro_step=2090/global_step=2090, RunningAvgSamplesPerSec=107.05361134522774, CurrSamplesPerSec=108.12874805543001, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:04:41,706] [INFO] [logging.py:96:log_dist] [Rank 0] step=2090, skipped=28, lr=[4.320047868713747e-06, 4.320047868713747e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2089|ppo_ep: 1|act_loss: -0.01372528076171875|cri_loss: -0.006328582763671875|unsuper_loss: 0.0
-average reward score: 5.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.99%) |Training time=0.46s (21.44%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2090|ppo_ep: 1|act_loss: 0.005096435546875|cri_loss: 0.00392913818359375|unsuper_loss: 0.0
-average reward score: 5.734375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.91%) |Training time=0.46s (21.50%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2091|ppo_ep: 1|act_loss: -0.005229949951171875|cri_loss: -0.0017862319946289062|unsuper_loss: 0.0
-average reward score: 5.2421875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.74%) |Training time=0.46s (21.67%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2092|ppo_ep: 1|act_loss: 0.0266876220703125|cri_loss: 0.0213470458984375|unsuper_loss: 0.0
-average reward score: 5.12890625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.77%) |Training time=0.46s (21.66%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2093|ppo_ep: 1|act_loss: -0.0106201171875|cri_loss: -0.0045623779296875|unsuper_loss: 0.0
-average reward score: 5.12890625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.85%) |Training time=0.46s (21.55%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2094|ppo_ep: 1|act_loss: 0.040252685546875|cri_loss: 0.025543212890625|unsuper_loss: 0.0
-average reward score: 4.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.12%) |Training time=0.46s (21.29%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2095|ppo_ep: 1|act_loss: -0.0213623046875|cri_loss: -0.00997161865234375|unsuper_loss: 0.0
-average reward score: 4.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.84%) |Training time=0.46s (21.56%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2096|ppo_ep: 1|act_loss: -0.0013408660888671875|cri_loss: -0.0003294944763183594|unsuper_loss: 0.0
-average reward score: 5.49609375
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.76s (76.15%) |Training time=0.45s (19.60%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.85 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2097|ppo_ep: 1|act_loss: 0.02960205078125|cri_loss: 0.0153045654296875|unsuper_loss: 0.0
-average reward score: 5.26171875
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.99%) |Training time=0.46s (21.41%) |Others=0.10 (4.60%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2098|ppo_ep: 1|act_loss: 0.08538818359375|cri_loss: 0.045074462890625|unsuper_loss: 0.0
-average reward score: 5.39453125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.85%) |Training time=0.46s (21.55%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.43
-[2023-04-14 10:05:03,168] [INFO] [logging.py:96:log_dist] [Rank 0] step=2100, skipped=22, lr=[8.317249798942473e-06, 8.317249798942473e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:05:03,186] [INFO] [timer.py:199:stop] epoch=0/micro_step=2100/global_step=2100, RunningAvgSamplesPerSec=107.05548599476938, CurrSamplesPerSec=106.59958715986158, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:05:03,279] [INFO] [logging.py:96:log_dist] [Rank 0] step=2100, skipped=28, lr=[4.313435927530719e-06, 4.313435927530719e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2099|ppo_ep: 1|act_loss: 0.0094146728515625|cri_loss: 0.005313873291015625|unsuper_loss: 0.0
-average reward score: 5.88671875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.81%) |Training time=0.46s (21.62%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2100|ppo_ep: 1|act_loss: -0.013214111328125|cri_loss: -0.004150390625|unsuper_loss: 0.0
-average reward score: 4.9921875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.82%) |Training time=0.46s (21.54%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2101|ppo_ep: 1|act_loss: -0.037139892578125|cri_loss: -0.0179595947265625|unsuper_loss: 0.0
-average reward score: 5.01171875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.03%) |Training time=0.46s (21.37%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2102|ppo_ep: 1|act_loss: -0.02099609375|cri_loss: -0.0099945068359375|unsuper_loss: 0.0
-average reward score: 5.3828125
--------------------------------------------------------------------------------------
-|E2E latency=2.46s |Gather latency=0.00s (0.00%) |Generate time=1.61s (65.65%) |Training time=0.44s (17.92%) |Others=0.40 (16.43%)|CurSamplesPerSec=13.02 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2103|ppo_ep: 1|act_loss: 0.01552581787109375|cri_loss: 0.0088958740234375|unsuper_loss: 0.0
-average reward score: 5.41796875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.10%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2104|ppo_ep: 1|act_loss: -0.002620697021484375|cri_loss: -0.0007905960083007812|unsuper_loss: 0.0
-average reward score: 5.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.32%) |Training time=0.45s (21.08%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2105|ppo_ep: 1|act_loss: 0.0304412841796875|cri_loss: 0.0157318115234375|unsuper_loss: 0.0
-average reward score: 5.078125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.51%) |Training time=0.45s (20.89%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2106|ppo_ep: 1|act_loss: 0.0146331787109375|cri_loss: 0.00815582275390625|unsuper_loss: 0.0
-average reward score: 4.8046875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.53%) |Training time=0.45s (20.91%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2107|ppo_ep: 1|act_loss: -0.0046539306640625|cri_loss: -0.001953125|unsuper_loss: 0.0
-average reward score: 5.796875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.32%) |Training time=0.45s (21.08%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.43
-[2023-04-14 10:05:22,846] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 2108|ppo_ep: 1|act_loss: -0.006683349609375|cri_loss: -0.002918243408203125|unsuper_loss: 0.0
-average reward score: 5.3046875
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.79%) |Training time=0.45s (21.08%) |Others=0.09 (4.12%)|CurSamplesPerSec=15.04 |AvgSamplesPerSec=14.43
-[2023-04-14 10:05:24,880] [INFO] [logging.py:96:log_dist] [Rank 0] step=2110, skipped=22, lr=[8.30440583969005e-06, 8.30440583969005e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:05:24,898] [INFO] [timer.py:199:stop] epoch=0/micro_step=2110/global_step=2110, RunningAvgSamplesPerSec=107.07153675733814, CurrSamplesPerSec=106.8084394578795, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:05:24,982] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-[2023-04-14 10:05:24,982] [INFO] [logging.py:96:log_dist] [Rank 0] step=2110, skipped=30, lr=[4.308127016630176e-06, 4.308127016630176e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2109|ppo_ep: 1|act_loss: -0.0302276611328125|cri_loss: -0.01470184326171875|unsuper_loss: 0.0
-average reward score: 5.65234375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.46s (21.64%) |Others=0.09 (4.10%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2110|ppo_ep: 1|act_loss: 0.011016845703125|cri_loss: 0.00604248046875|unsuper_loss: 0.0
-average reward score: 5.359375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.40%) |Training time=0.45s (21.02%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2111|ppo_ep: 1|act_loss: 0.0041656494140625|cri_loss: 0.0021915435791015625|unsuper_loss: 0.0
-average reward score: 4.875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.95%) |Training time=0.45s (20.54%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2112|ppo_ep: 1|act_loss: 0.0007038116455078125|cri_loss: 0.0009160041809082031|unsuper_loss: 0.0
-average reward score: 5.4921875
--------------------------------------------------------------------------------------
-|E2E latency=2.38s |Gather latency=0.00s (0.00%) |Generate time=1.82s (76.67%) |Training time=0.46s (19.18%) |Others=0.10 (4.14%)|CurSamplesPerSec=13.46 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2113|ppo_ep: 1|act_loss: 0.035888671875|cri_loss: 0.0190582275390625|unsuper_loss: 0.0
-average reward score: 5.078125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.17%) |Training time=0.45s (21.24%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2114|ppo_ep: 1|act_loss: -0.0216522216796875|cri_loss: -0.01023101806640625|unsuper_loss: 0.0
-average reward score: 5.3984375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.34%) |Training time=0.45s (21.08%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2115|ppo_ep: 1|act_loss: -0.014190673828125|cri_loss: -0.006740570068359375|unsuper_loss: 0.0
-average reward score: 4.8515625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.33%) |Training time=0.45s (21.07%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2116|ppo_ep: 1|act_loss: -0.03753662109375|cri_loss: -0.016510009765625|unsuper_loss: 0.0
-average reward score: 5.20703125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.00%) |Training time=0.46s (21.43%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2117|ppo_ep: 1|act_loss: -0.02288818359375|cri_loss: -0.01116943359375|unsuper_loss: 0.0
-average reward score: 5.53125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.56%) |Training time=0.45s (20.86%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2118|ppo_ep: 1|act_loss: 0.041748046875|cri_loss: 0.0218658447265625|unsuper_loss: 0.0
-average reward score: 5.29296875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.16%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.43
-[2023-04-14 10:05:46,571] [INFO] [logging.py:96:log_dist] [Rank 0] step=2120, skipped=22, lr=[8.291510307249474e-06, 8.291510307249474e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:05:46,589] [INFO] [timer.py:199:stop] epoch=0/micro_step=2120/global_step=2120, RunningAvgSamplesPerSec=107.08617854175999, CurrSamplesPerSec=108.78975468800026, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:05:46,682] [INFO] [logging.py:96:log_dist] [Rank 0] step=2120, skipped=30, lr=[4.301466763187256e-06, 4.301466763187256e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2119|ppo_ep: 1|act_loss: -0.0065765380859375|cri_loss: -0.00258636474609375|unsuper_loss: 0.0
-average reward score: 5.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.09%) |Training time=0.46s (21.34%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2120|ppo_ep: 1|act_loss: 0.0123443603515625|cri_loss: 0.007183074951171875|unsuper_loss: 0.0
-average reward score: 4.796875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.23%) |Training time=0.45s (21.18%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2121|ppo_ep: 1|act_loss: -0.019927978515625|cri_loss: -0.0091552734375|unsuper_loss: 0.0
-average reward score: 5.26171875
--------------------------------------------------------------------------------------
-|E2E latency=2.95s |Gather latency=0.00s (0.00%) |Generate time=1.58s (53.69%) |Training time=0.45s (15.36%) |Others=0.91 (30.95%)|CurSamplesPerSec=10.85 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2122|ppo_ep: 1|act_loss: -0.0125274658203125|cri_loss: -0.005584716796875|unsuper_loss: 0.0
-average reward score: 5.23046875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.77%) |Training time=0.46s (21.65%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2123|ppo_ep: 1|act_loss: 0.03314208984375|cri_loss: 0.0187530517578125|unsuper_loss: 0.0
-average reward score: 4.93359375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.22%) |Training time=0.45s (21.19%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.43
-[2023-04-14 10:05:58,065] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 2124|ppo_ep: 1|act_loss: 0.0307159423828125|cri_loss: 0.0158843994140625|unsuper_loss: 0.0
-average reward score: 5.52734375
--------------------------------------------------------------------------------------
-|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.59s (75.23%) |Training time=0.42s (20.12%) |Others=0.10 (4.65%)|CurSamplesPerSec=15.18 |AvgSamplesPerSec=14.43
-[2023-04-14 10:06:00,172] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 2125|ppo_ep: 1|act_loss: 0.00861358642578125|cri_loss: 0.004924774169921875|unsuper_loss: 0.0
-average reward score: 5.04296875
--------------------------------------------------------------------------------------
-|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.58s (75.14%) |Training time=0.43s (20.21%) |Others=0.10 (4.65%)|CurSamplesPerSec=15.19 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2126|ppo_ep: 1|act_loss: -0.0016460418701171875|cri_loss: 2.6702880859375e-05|unsuper_loss: 0.0
-average reward score: 5.1796875
--------------------------------------------------------------------------------------
-|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.08%) |Training time=0.48s (21.46%) |Others=0.17 (7.45%)|CurSamplesPerSec=14.34 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2127|ppo_ep: 1|act_loss: -0.0195770263671875|cri_loss: -0.0091552734375|unsuper_loss: 0.0
-average reward score: 5.24609375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.21%) |Training time=0.45s (21.19%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2128|ppo_ep: 1|act_loss: 0.0018329620361328125|cri_loss: 0.0025920867919921875|unsuper_loss: 0.0
-average reward score: 5.29296875
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.18%) |Training time=0.45s (21.23%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.43
-[2023-04-14 10:06:08,802] [INFO] [logging.py:96:log_dist] [Rank 0] step=2130, skipped=24, lr=[8.281156877023959e-06, 8.281156877023959e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:06:08,820] [INFO] [timer.py:199:stop] epoch=0/micro_step=2130/global_step=2130, RunningAvgSamplesPerSec=107.10202712994473, CurrSamplesPerSec=108.1727569609888, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:06:08,912] [INFO] [logging.py:96:log_dist] [Rank 0] step=2130, skipped=30, lr=[4.294779807661105e-06, 4.294779807661105e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2129|ppo_ep: 1|act_loss: 0.0150299072265625|cri_loss: 0.0085296630859375|unsuper_loss: 0.0
-average reward score: 5.29296875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.40%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2130|ppo_ep: 1|act_loss: -0.00832366943359375|cri_loss: -0.0035114288330078125|unsuper_loss: 0.0
-average reward score: 5.30859375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.46s (21.28%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2131|ppo_ep: 1|act_loss: 0.0592041015625|cri_loss: 0.0307769775390625|unsuper_loss: 0.0
-average reward score: 5.6328125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.17%) |Training time=0.45s (21.19%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2132|ppo_ep: 1|act_loss: 0.0191650390625|cri_loss: 0.01004791259765625|unsuper_loss: 0.0
-average reward score: 4.9296875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.80%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2133|ppo_ep: 1|act_loss: -0.0196533203125|cri_loss: -0.00951385498046875|unsuper_loss: 0.0
-average reward score: 5.0
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.83%) |Training time=0.47s (21.65%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2134|ppo_ep: 1|act_loss: 0.012603759765625|cri_loss: 0.007221221923828125|unsuper_loss: 0.0
-average reward score: 4.90625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.81%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2135|ppo_ep: 1|act_loss: -0.01303863525390625|cri_loss: -0.0056915283203125|unsuper_loss: 0.0
-average reward score: 5.46484375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.68%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2136|ppo_ep: 1|act_loss: 0.005916595458984375|cri_loss: 0.003437042236328125|unsuper_loss: 0.0
-average reward score: 5.1328125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.05%) |Training time=0.46s (21.39%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2137|ppo_ep: 1|act_loss: 0.0465087890625|cri_loss: 0.02520751953125|unsuper_loss: 0.0
-average reward score: 5.375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.71%) |Training time=0.47s (21.73%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2138|ppo_ep: 1|act_loss: 0.02044677734375|cri_loss: 0.011077880859375|unsuper_loss: 0.0
-average reward score: 5.52734375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.67%) |Training time=0.47s (21.78%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.43
-[2023-04-14 10:06:30,321] [INFO] [logging.py:96:log_dist] [Rank 0] step=2140, skipped=24, lr=[8.268168995036705e-06, 8.268168995036705e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:06:30,339] [INFO] [timer.py:199:stop] epoch=0/micro_step=2140/global_step=2140, RunningAvgSamplesPerSec=107.09897142545462, CurrSamplesPerSec=106.86669384427232, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:06:30,432] [INFO] [logging.py:96:log_dist] [Rank 0] step=2140, skipped=30, lr=[4.2880662491685345e-06, 4.2880662491685345e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2139|ppo_ep: 1|act_loss: 0.0289459228515625|cri_loss: 0.0149688720703125|unsuper_loss: 0.0
-average reward score: 5.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.97%) |Training time=0.46s (21.48%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2140|ppo_ep: 1|act_loss: -0.02886962890625|cri_loss: -0.01361083984375|unsuper_loss: 0.0
-average reward score: 5.46484375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.76%) |Training time=0.47s (21.74%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2141|ppo_ep: 1|act_loss: 0.0021343231201171875|cri_loss: 0.0018062591552734375|unsuper_loss: 0.0
-average reward score: 5.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.50s |Gather latency=0.00s (0.00%) |Generate time=1.86s (74.51%) |Training time=0.46s (18.56%) |Others=0.17 (6.94%)|CurSamplesPerSec=12.79 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2142|ppo_ep: 1|act_loss: 0.0106658935546875|cri_loss: 0.00615692138671875|unsuper_loss: 0.0
-average reward score: 5.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.89%) |Training time=0.46s (21.53%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2143|ppo_ep: 1|act_loss: 0.044830322265625|cri_loss: 0.0255889892578125|unsuper_loss: 0.0
-average reward score: 4.66796875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.80%) |Training time=0.47s (21.64%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2144|ppo_ep: 1|act_loss: -0.0017185211181640625|cri_loss: -0.0005717277526855469|unsuper_loss: 0.0
-average reward score: 4.77734375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.67%) |Training time=0.47s (21.76%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2145|ppo_ep: 1|act_loss: -0.03558349609375|cri_loss: -0.0161895751953125|unsuper_loss: 0.0
-average reward score: 5.484375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.00%) |Training time=0.46s (21.44%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2146|ppo_ep: 1|act_loss: -0.05517578125|cri_loss: -0.0265045166015625|unsuper_loss: 0.0
-average reward score: 5.8046875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.81%) |Training time=0.47s (21.63%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2147|ppo_ep: 1|act_loss: -0.046875|cri_loss: -0.022003173828125|unsuper_loss: 0.0
-average reward score: 5.23828125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.08%) |Training time=0.46s (21.33%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2148|ppo_ep: 1|act_loss: 0.0095367431640625|cri_loss: 0.005390167236328125|unsuper_loss: 0.0
-average reward score: 4.63671875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.96%) |Training time=0.49s (22.55%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.43
-[2023-04-14 10:06:52,265] [INFO] [logging.py:96:log_dist] [Rank 0] step=2150, skipped=24, lr=[8.255130076978754e-06, 8.255130076978754e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:06:52,283] [INFO] [timer.py:199:stop] epoch=0/micro_step=2150/global_step=2150, RunningAvgSamplesPerSec=107.08429552059788, CurrSamplesPerSec=98.68667589679414, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:06:52,375] [INFO] [logging.py:96:log_dist] [Rank 0] step=2150, skipped=30, lr=[4.281326187220675e-06, 4.281326187220675e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2149|ppo_ep: 1|act_loss: 0.029083251953125|cri_loss: 0.0157318115234375|unsuper_loss: 0.0
-average reward score: 4.90625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.02%) |Training time=0.49s (22.47%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2150|ppo_ep: 1|act_loss: 0.0665283203125|cri_loss: 0.03466796875|unsuper_loss: 0.0
-average reward score: 5.3203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.31%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2151|ppo_ep: 1|act_loss: 0.01038360595703125|cri_loss: 0.006134033203125|unsuper_loss: 0.0
-average reward score: 5.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.12%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2152|ppo_ep: 1|act_loss: 0.0092620849609375|cri_loss: 0.004779815673828125|unsuper_loss: 0.0
-average reward score: 5.390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.49s (22.47%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2153|ppo_ep: 1|act_loss: 0.001251220703125|cri_loss: 0.0017547607421875|unsuper_loss: 0.0
-average reward score: 5.7265625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.69%) |Training time=0.50s (22.81%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2154|ppo_ep: 1|act_loss: -0.04931640625|cri_loss: -0.02398681640625|unsuper_loss: 0.0
-average reward score: 5.2578125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.88%) |Training time=0.49s (22.63%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2155|ppo_ep: 1|act_loss: 0.0128173828125|cri_loss: 0.00759124755859375|unsuper_loss: 0.0
-average reward score: 4.75
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.24%) |Training time=0.48s (22.23%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2156|ppo_ep: 1|act_loss: -0.005664825439453125|cri_loss: -0.002101898193359375|unsuper_loss: 0.0
-average reward score: 6.09375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.21%) |Training time=0.48s (22.25%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2157|ppo_ep: 1|act_loss: 0.0102081298828125|cri_loss: 0.005924224853515625|unsuper_loss: 0.0
-average reward score: 5.25
--------------------------------------------------------------------------------------
-|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.51%) |Training time=0.50s (21.30%) |Others=0.10 (4.19%)|CurSamplesPerSec=13.58 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2158|ppo_ep: 1|act_loss: -0.004772186279296875|cri_loss: -0.000301361083984375|unsuper_loss: 0.0
-average reward score: 5.3203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.47%) |Training time=0.47s (21.97%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.43
-[2023-04-14 10:07:14,131] [INFO] [logging.py:96:log_dist] [Rank 0] step=2160, skipped=24, lr=[8.242040316118323e-06, 8.242040316118323e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:07:14,150] [INFO] [timer.py:199:stop] epoch=0/micro_step=2160/global_step=2160, RunningAvgSamplesPerSec=107.04436231088094, CurrSamplesPerSec=103.56585864587393, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:07:14,242] [INFO] [logging.py:96:log_dist] [Rank 0] step=2160, skipped=30, lr=[4.2745597217215065e-06, 4.2745597217215065e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2159|ppo_ep: 1|act_loss: 0.0247344970703125|cri_loss: 0.01314544677734375|unsuper_loss: 0.0
-average reward score: 5.29296875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.87%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2160|ppo_ep: 1|act_loss: -0.05059814453125|cri_loss: -0.0238494873046875|unsuper_loss: 0.0
-average reward score: 5.76171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.08%) |Training time=0.47s (21.65%) |Others=0.11 (5.27%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2161|ppo_ep: 1|act_loss: -0.03314208984375|cri_loss: -0.0160369873046875|unsuper_loss: 0.0
-average reward score: 5.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.90%) |Training time=0.46s (21.53%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2162|ppo_ep: 1|act_loss: -0.028076171875|cri_loss: -0.013458251953125|unsuper_loss: 0.0
-average reward score: 5.34375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.47s (21.65%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2163|ppo_ep: 1|act_loss: 0.003276824951171875|cri_loss: 0.0018472671508789062|unsuper_loss: 0.0
-average reward score: 5.2109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.28%) |Training time=0.45s (21.13%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2164|ppo_ep: 1|act_loss: 0.0155181884765625|cri_loss: 0.008453369140625|unsuper_loss: 0.0
-average reward score: 5.53125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.46s (21.30%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2165|ppo_ep: 1|act_loss: 0.03338623046875|cri_loss: 0.017547607421875|unsuper_loss: 0.0
-average reward score: 5.4609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.75%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2166|ppo_ep: 1|act_loss: 0.0576171875|cri_loss: 0.029571533203125|unsuper_loss: 0.0
-average reward score: 5.2109375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.68%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2167|ppo_ep: 1|act_loss: 0.0482177734375|cri_loss: 0.02642822265625|unsuper_loss: 0.0
-average reward score: 4.84765625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.85%) |Training time=0.47s (21.58%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2168|ppo_ep: 1|act_loss: 0.0726318359375|cri_loss: 0.03887939453125|unsuper_loss: 0.0
-average reward score: 5.125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.50%) |Training time=0.47s (21.95%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
-[2023-04-14 10:07:35,717] [INFO] [logging.py:96:log_dist] [Rank 0] step=2170, skipped=24, lr=[8.228899906477248e-06, 8.228899906477248e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:07:35,735] [INFO] [timer.py:199:stop] epoch=0/micro_step=2170/global_step=2170, RunningAvgSamplesPerSec=107.03586667134884, CurrSamplesPerSec=102.76559751986322, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:07:35,828] [INFO] [logging.py:96:log_dist] [Rank 0] step=2170, skipped=30, lr=[4.267766952966369e-06, 4.267766952966369e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2169|ppo_ep: 1|act_loss: -0.03533935546875|cri_loss: -0.0171356201171875|unsuper_loss: 0.0
-average reward score: 4.89453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.47s (21.89%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2170|ppo_ep: 1|act_loss: -0.030029296875|cri_loss: -0.0144195556640625|unsuper_loss: 0.0
-average reward score: 5.25390625
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.63%) |Training time=0.48s (21.90%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.54 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2171|ppo_ep: 1|act_loss: -0.020263671875|cri_loss: -0.00963592529296875|unsuper_loss: 0.0
-average reward score: 5.21484375
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.43%) |Training time=0.48s (21.10%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.02 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2172|ppo_ep: 1|act_loss: 0.032257080078125|cri_loss: 0.0193328857421875|unsuper_loss: 0.0
-average reward score: 4.89453125
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.71%) |Training time=0.49s (21.10%) |Others=0.10 (4.19%)|CurSamplesPerSec=13.66 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2173|ppo_ep: 1|act_loss: -0.018524169921875|cri_loss: -0.00890350341796875|unsuper_loss: 0.0
-average reward score: 5.734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.05%) |Training time=0.49s (22.33%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2174|ppo_ep: 1|act_loss: -0.00188446044921875|cri_loss: -0.0005998611450195312|unsuper_loss: 0.0
-average reward score: 5.74609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.48s (22.02%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2175|ppo_ep: 1|act_loss: 0.01287841796875|cri_loss: 0.006801605224609375|unsuper_loss: 0.0
-average reward score: 5.53125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.30%) |Training time=0.48s (22.16%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2176|ppo_ep: 1|act_loss: 0.0091705322265625|cri_loss: 0.004795074462890625|unsuper_loss: 0.0
-average reward score: 5.3359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.87%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2177|ppo_ep: 1|act_loss: -0.04803466796875|cri_loss: -0.02337646484375|unsuper_loss: 0.0
-average reward score: 5.125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.82%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2178|ppo_ep: 1|act_loss: 0.0144805908203125|cri_loss: 0.00882720947265625|unsuper_loss: 0.0
-average reward score: 4.93359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.48s (22.05%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.44
-[2023-04-14 10:07:57,675] [INFO] [logging.py:96:log_dist] [Rank 0] step=2180, skipped=24, lr=[8.215709042828096e-06, 8.215709042828096e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:07:57,693] [INFO] [timer.py:199:stop] epoch=0/micro_step=2180/global_step=2180, RunningAvgSamplesPerSec=107.0068419629447, CurrSamplesPerSec=103.48066046173109, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:07:57,786] [INFO] [logging.py:96:log_dist] [Rank 0] step=2180, skipped=30, lr=[4.2609479816404836e-06, 4.2609479816404836e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2179|ppo_ep: 1|act_loss: 0.00109100341796875|cri_loss: 0.0007219314575195312|unsuper_loss: 0.0
-average reward score: 5.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.54%) |Training time=0.47s (21.90%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2180|ppo_ep: 1|act_loss: 0.00707244873046875|cri_loss: 0.0037384033203125|unsuper_loss: 0.0
-average reward score: 5.45703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.49s (22.40%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2181|ppo_ep: 1|act_loss: -0.0042572021484375|cri_loss: -0.0018062591552734375|unsuper_loss: 0.0
-average reward score: 5.18359375
--------------------------------------------------------------------------------------
-|E2E latency=2.81s |Gather latency=0.00s (0.00%) |Generate time=1.58s (56.29%) |Training time=0.48s (17.15%) |Others=0.75 (26.57%)|CurSamplesPerSec=11.38 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2182|ppo_ep: 1|act_loss: 0.054351806640625|cri_loss: 0.0278472900390625|unsuper_loss: 0.0
-average reward score: 5.1640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.25%) |Training time=0.48s (22.21%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2183|ppo_ep: 1|act_loss: -0.01373291015625|cri_loss: -0.0059661865234375|unsuper_loss: 0.0
-average reward score: 5.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.92%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2184|ppo_ep: 1|act_loss: 0.0653076171875|cri_loss: 0.03363037109375|unsuper_loss: 0.0
-average reward score: 5.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.45%) |Training time=0.47s (22.00%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2185|ppo_ep: 1|act_loss: -0.0047760009765625|cri_loss: -0.0015497207641601562|unsuper_loss: 0.0
-average reward score: 5.1953125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.86%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2186|ppo_ep: 1|act_loss: -0.00888824462890625|cri_loss: -0.00391387939453125|unsuper_loss: 0.0
-average reward score: 4.875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.88%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2187|ppo_ep: 1|act_loss: -0.031890869140625|cri_loss: -0.0145111083984375|unsuper_loss: 0.0
-average reward score: 5.31640625
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.28%) |Training time=0.48s (20.51%) |Others=0.10 (4.21%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2188|ppo_ep: 1|act_loss: -0.027069091796875|cri_loss: -0.01320648193359375|unsuper_loss: 0.0
-average reward score: 5.25
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.49%) |Training time=0.47s (21.96%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44
-[2023-04-14 10:08:20,079] [INFO] [logging.py:96:log_dist] [Rank 0] step=2190, skipped=24, lr=[8.20246792069129e-06, 8.20246792069129e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:08:20,097] [INFO] [timer.py:199:stop] epoch=0/micro_step=2190/global_step=2190, RunningAvgSamplesPerSec=106.98401354109774, CurrSamplesPerSec=106.3205036803265, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:08:20,190] [INFO] [logging.py:96:log_dist] [Rank 0] step=2190, skipped=30, lr=[4.254102908817454e-06, 4.254102908817454e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2189|ppo_ep: 1|act_loss: 0.0097808837890625|cri_loss: 0.005466461181640625|unsuper_loss: 0.0
-average reward score: 5.47265625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.92%) |Training time=0.46s (21.54%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2190|ppo_ep: 1|act_loss: -0.016876220703125|cri_loss: -0.00792694091796875|unsuper_loss: 0.0
-average reward score: 5.078125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.47s (21.64%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2191|ppo_ep: 1|act_loss: 0.040283203125|cri_loss: 0.0205841064453125|unsuper_loss: 0.0
-average reward score: 5.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.76%) |Training time=0.47s (21.66%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2192|ppo_ep: 1|act_loss: 0.021575927734375|cri_loss: 0.0172271728515625|unsuper_loss: 0.0
-average reward score: 5.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.47s (21.96%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2193|ppo_ep: 1|act_loss: 0.045806884765625|cri_loss: 0.02337646484375|unsuper_loss: 0.0
-average reward score: 5.82421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.49s (22.38%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2194|ppo_ep: 1|act_loss: 0.0016117095947265625|cri_loss: 0.0012531280517578125|unsuper_loss: 0.0
-average reward score: 5.515625
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.10%) |Training time=0.51s (23.42%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2195|ppo_ep: 1|act_loss: -0.029296875|cri_loss: -0.013427734375|unsuper_loss: 0.0
-average reward score: 5.65625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.34%) |Training time=0.51s (23.19%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2196|ppo_ep: 1|act_loss: -0.0033473968505859375|cri_loss: -0.0009822845458984375|unsuper_loss: 0.0
-average reward score: 5.0234375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.63%) |Training time=0.50s (22.89%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2197|ppo_ep: 1|act_loss: -0.033203125|cri_loss: -0.016326904296875|unsuper_loss: 0.0
-average reward score: 5.515625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.05%) |Training time=0.46s (21.36%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2198|ppo_ep: 1|act_loss: -0.00848388671875|cri_loss: -0.003993988037109375|unsuper_loss: 0.0
-average reward score: 5.6015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.30%) |Training time=0.48s (22.17%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.44
-[2023-04-14 10:08:41,790] [INFO] [logging.py:96:log_dist] [Rank 0] step=2200, skipped=24, lr=[8.189176736332201e-06, 8.189176736332201e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:08:41,808] [INFO] [timer.py:199:stop] epoch=0/micro_step=2200/global_step=2200, RunningAvgSamplesPerSec=106.94942238546258, CurrSamplesPerSec=98.5336633016457, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:08:41,900] [INFO] [logging.py:96:log_dist] [Rank 0] step=2200, skipped=30, lr=[4.247231835957773e-06, 4.247231835957773e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2199|ppo_ep: 1|act_loss: 0.040740966796875|cri_loss: 0.0230560302734375|unsuper_loss: 0.0
-average reward score: 4.8046875
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.48%) |Training time=0.49s (22.10%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.51 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2200|ppo_ep: 1|act_loss: -0.04095458984375|cri_loss: -0.02008056640625|unsuper_loss: 0.0
-average reward score: 5.19140625
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.04%) |Training time=0.49s (21.63%) |Others=0.10 (4.33%)|CurSamplesPerSec=14.06 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2201|ppo_ep: 1|act_loss: 0.0758056640625|cri_loss: 0.03912353515625|unsuper_loss: 0.0
-average reward score: 5.3671875
--------------------------------------------------------------------------------------
-|E2E latency=2.61s |Gather latency=0.00s (0.00%) |Generate time=1.58s (60.48%) |Training time=0.49s (18.92%) |Others=0.54 (20.61%)|CurSamplesPerSec=12.26 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2202|ppo_ep: 1|act_loss: -0.07659912109375|cri_loss: -0.0325927734375|unsuper_loss: 0.0
-average reward score: 5.4921875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.77%) |Training time=0.46s (21.66%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2203|ppo_ep: 1|act_loss: -0.0361328125|cri_loss: -0.0171661376953125|unsuper_loss: 0.0
-average reward score: 5.25
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.96%) |Training time=0.46s (21.45%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2204|ppo_ep: 1|act_loss: 0.01202392578125|cri_loss: 0.006740570068359375|unsuper_loss: 0.0
-average reward score: 5.14453125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.02%) |Training time=0.46s (21.38%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2205|ppo_ep: 1|act_loss: -0.01861572265625|cri_loss: -0.0091094970703125|unsuper_loss: 0.0
-average reward score: 5.51171875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.77%) |Training time=0.47s (21.67%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2206|ppo_ep: 1|act_loss: 0.0245513916015625|cri_loss: 0.013031005859375|unsuper_loss: 0.0
-average reward score: 5.04296875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.72%) |Training time=0.47s (21.69%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2207|ppo_ep: 1|act_loss: -0.00606536865234375|cri_loss: -0.0018520355224609375|unsuper_loss: 0.0
-average reward score: 5.03515625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.80%) |Training time=0.46s (21.59%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2208|ppo_ep: 1|act_loss: 0.030975341796875|cri_loss: 0.0165557861328125|unsuper_loss: 0.0
-average reward score: 4.87890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.21%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.44
-[2023-04-14 10:09:03,847] [INFO] [logging.py:96:log_dist] [Rank 0] step=2210, skipped=24, lr=[8.175835686758245e-06, 8.175835686758245e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:09:03,865] [INFO] [timer.py:199:stop] epoch=0/micro_step=2210/global_step=2210, RunningAvgSamplesPerSec=106.93468634085123, CurrSamplesPerSec=105.99765446011911, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:09:03,958] [INFO] [logging.py:96:log_dist] [Rank 0] step=2210, skipped=30, lr=[4.240334864907317e-06, 4.240334864907317e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2209|ppo_ep: 1|act_loss: 0.018798828125|cri_loss: 0.0097198486328125|unsuper_loss: 0.0
-average reward score: 5.84375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.75%) |Training time=0.46s (21.67%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44
-[2023-04-14 10:09:06,094] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 2210|ppo_ep: 1|act_loss: 0.09375|cri_loss: 0.048583984375|unsuper_loss: 0.0
-average reward score: 5.0859375
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.20%) |Training time=0.46s (21.69%) |Others=0.09 (4.10%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.44
-[2023-04-14 10:09:08,221] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 2211|ppo_ep: 1|act_loss: 0.0106658935546875|cri_loss: 0.006023406982421875|unsuper_loss: 0.0
-average reward score: 5.44921875
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.58%) |Training time=0.45s (21.30%) |Others=0.09 (4.12%)|CurSamplesPerSec=15.04 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2212|ppo_ep: 1|act_loss: -0.001712799072265625|cri_loss: -0.0007014274597167969|unsuper_loss: 0.0
-average reward score: 4.8125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.88%) |Training time=0.46s (21.54%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2213|ppo_ep: 1|act_loss: 0.02947998046875|cri_loss: 0.0155029296875|unsuper_loss: 0.0
-average reward score: 5.0703125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.13%) |Training time=0.45s (21.28%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2214|ppo_ep: 1|act_loss: -0.0101470947265625|cri_loss: -0.004581451416015625|unsuper_loss: 0.0
-average reward score: 4.8984375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.24%) |Training time=0.45s (21.16%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2215|ppo_ep: 1|act_loss: 0.004032135009765625|cri_loss: 0.0030670166015625|unsuper_loss: 0.0
-average reward score: 5.203125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.92%) |Training time=0.46s (21.50%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2216|ppo_ep: 1|act_loss: -0.00817108154296875|cri_loss: -0.0038509368896484375|unsuper_loss: 0.0
-average reward score: 5.69140625
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.84%) |Training time=0.46s (19.93%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2217|ppo_ep: 1|act_loss: -0.03326416015625|cri_loss: -0.0159149169921875|unsuper_loss: 0.0
-average reward score: 5.328125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.15%) |Training time=0.45s (21.26%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2218|ppo_ep: 1|act_loss: 0.026885986328125|cri_loss: 0.01406097412109375|unsuper_loss: 0.0
-average reward score: 4.87890625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.07%) |Training time=0.46s (21.34%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.44
-[2023-04-14 10:09:25,399] [INFO] [logging.py:96:log_dist] [Rank 0] step=2220, skipped=24, lr=[8.162444969715961e-06, 8.162444969715961e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:09:25,418] [INFO] [timer.py:199:stop] epoch=0/micro_step=2220/global_step=2220, RunningAvgSamplesPerSec=106.94111013177839, CurrSamplesPerSec=108.4043708167457, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:09:25,510] [INFO] [logging.py:96:log_dist] [Rank 0] step=2220, skipped=32, lr=[4.234798710055124e-06, 4.234798710055124e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2219|ppo_ep: 1|act_loss: 0.033538818359375|cri_loss: 0.0184783935546875|unsuper_loss: 0.0
-average reward score: 4.8359375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.06%) |Training time=0.46s (21.37%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2220|ppo_ep: 1|act_loss: 0.016204833984375|cri_loss: 0.00933837890625|unsuper_loss: 0.0
-average reward score: 4.73828125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.24%) |Training time=0.45s (21.16%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2221|ppo_ep: 1|act_loss: 0.01727294921875|cri_loss: 0.0093231201171875|unsuper_loss: 0.0
-average reward score: 4.63671875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.22%) |Training time=0.46s (21.21%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2222|ppo_ep: 1|act_loss: -0.0013675689697265625|cri_loss: -0.0002951622009277344|unsuper_loss: 0.0
-average reward score: 4.44140625
--------------------------------------------------------------------------------------
-|E2E latency=3.02s |Gather latency=0.00s (0.00%) |Generate time=1.58s (52.40%) |Training time=0.46s (15.08%) |Others=0.98 (32.52%)|CurSamplesPerSec=10.58 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2223|ppo_ep: 1|act_loss: 0.0084075927734375|cri_loss: 0.0046844482421875|unsuper_loss: 0.0
-average reward score: 5.078125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.78%) |Training time=0.49s (22.45%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2224|ppo_ep: 1|act_loss: -0.0291595458984375|cri_loss: -0.0138092041015625|unsuper_loss: 0.0
-average reward score: 5.19140625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.39%) |Training time=0.50s (23.11%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2225|ppo_ep: 1|act_loss: 0.0131683349609375|cri_loss: 0.00794219970703125|unsuper_loss: 0.0
-average reward score: 5.0859375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.16%) |Training time=0.49s (22.33%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.44
-[2023-04-14 10:09:41,411] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 2226|ppo_ep: 1|act_loss: -0.040618896484375|cri_loss: -0.018310546875|unsuper_loss: 0.0
-average reward score: 5.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.85%) |Training time=0.44s (20.58%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.44
-[2023-04-14 10:09:43,556] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 2227|ppo_ep: 1|act_loss: 0.018310546875|cri_loss: 0.009613037109375|unsuper_loss: 0.0
-average reward score: 5.2578125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.46s (21.22%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2228|ppo_ep: 1|act_loss: -0.00025463104248046875|cri_loss: 0.0007419586181640625|unsuper_loss: 0.0
-average reward score: 4.734375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.04%) |Training time=0.49s (22.49%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.44
-[2023-04-14 10:09:48,024] [INFO] [logging.py:96:log_dist] [Rank 0] step=2230, skipped=26, lr=[8.15169676886067e-06, 8.15169676886067e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:09:48,043] [INFO] [timer.py:199:stop] epoch=0/micro_step=2230/global_step=2230, RunningAvgSamplesPerSec=106.92186677661333, CurrSamplesPerSec=95.00962924947352, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:09:48,135] [INFO] [logging.py:96:log_dist] [Rank 0] step=2230, skipped=32, lr=[4.227855380137234e-06, 4.227855380137234e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2229|ppo_ep: 1|act_loss: 0.0445556640625|cri_loss: 0.024322509765625|unsuper_loss: 0.0
-average reward score: 5.08203125
--------------------------------------------------------------------------------------
-|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.69s (73.91%) |Training time=0.50s (21.81%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.98 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2230|ppo_ep: 1|act_loss: -0.001605987548828125|cri_loss: -0.00045108795166015625|unsuper_loss: 0.0
-average reward score: 5.04296875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.25%) |Training time=0.45s (21.15%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2231|ppo_ep: 1|act_loss: -0.0088653564453125|cri_loss: -0.00415802001953125|unsuper_loss: 0.0
-average reward score: 5.8203125
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.02%) |Training time=0.49s (20.77%) |Others=0.10 (4.21%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2232|ppo_ep: 1|act_loss: 0.00551605224609375|cri_loss: 0.00337982177734375|unsuper_loss: 0.0
-average reward score: 5.2578125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.88%) |Training time=0.46s (21.53%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2233|ppo_ep: 1|act_loss: 0.00797271728515625|cri_loss: 0.0043792724609375|unsuper_loss: 0.0
-average reward score: 5.36328125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.13%) |Training time=0.45s (21.28%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2234|ppo_ep: 1|act_loss: 0.0162353515625|cri_loss: 0.0086517333984375|unsuper_loss: 0.0
-average reward score: 5.1171875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.17%) |Training time=0.46s (21.16%) |Others=0.15 (6.67%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2235|ppo_ep: 1|act_loss: -0.0178070068359375|cri_loss: -0.00872039794921875|unsuper_loss: 0.0
-average reward score: 4.796875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.08%) |Training time=0.46s (21.31%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2236|ppo_ep: 1|act_loss: -0.037078857421875|cri_loss: -0.0180206298828125|unsuper_loss: 0.0
-average reward score: 5.1796875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.90%) |Training time=0.46s (21.53%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2237|ppo_ep: 1|act_loss: -0.02801513671875|cri_loss: -0.01322174072265625|unsuper_loss: 0.0
-average reward score: 4.84375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.73%) |Training time=0.46s (21.68%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2238|ppo_ep: 1|act_loss: 0.031951904296875|cri_loss: 0.0170135498046875|unsuper_loss: 0.0
-average reward score: 5.37890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.37%) |Training time=0.47s (21.87%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
-[2023-04-14 10:10:09,702] [INFO] [logging.py:96:log_dist] [Rank 0] step=2240, skipped=26, lr=[8.138217151044717e-06, 8.138217151044717e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:10:09,721] [INFO] [timer.py:199:stop] epoch=0/micro_step=2240/global_step=2240, RunningAvgSamplesPerSec=106.91724169004912, CurrSamplesPerSec=102.24546793214905, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:10:09,813] [INFO] [logging.py:96:log_dist] [Rank 0] step=2240, skipped=32, lr=[4.220886439234385e-06, 4.220886439234385e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2239|ppo_ep: 1|act_loss: -0.039947509765625|cri_loss: -0.0196380615234375|unsuper_loss: 0.0
-average reward score: 5.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.48s (22.09%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2240|ppo_ep: 1|act_loss: -0.023529052734375|cri_loss: -0.01129913330078125|unsuper_loss: 0.0
-average reward score: 5.23828125
--------------------------------------------------------------------------------------
-|E2E latency=2.44s |Gather latency=0.00s (0.00%) |Generate time=1.58s (64.78%) |Training time=0.49s (20.00%) |Others=0.37 (15.22%)|CurSamplesPerSec=13.11 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2241|ppo_ep: 1|act_loss: 0.07281494140625|cri_loss: 0.0377197265625|unsuper_loss: 0.0
-average reward score: 5.18359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.52%) |Training time=0.43s (19.84%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2242|ppo_ep: 1|act_loss: 0.0274200439453125|cri_loss: 0.0153656005859375|unsuper_loss: 0.0
-average reward score: 4.90234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.38%) |Training time=0.44s (20.04%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2243|ppo_ep: 1|act_loss: 0.03289794921875|cri_loss: 0.017913818359375|unsuper_loss: 0.0
-average reward score: 5.5
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.36%) |Training time=0.46s (21.14%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2244|ppo_ep: 1|act_loss: 0.00852203369140625|cri_loss: 0.004596710205078125|unsuper_loss: 0.0
-average reward score: 5.62890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.29%) |Training time=0.46s (21.18%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2245|ppo_ep: 1|act_loss: 0.0084381103515625|cri_loss: 0.005046844482421875|unsuper_loss: 0.0
-average reward score: 4.7109375
--------------------------------------------------------------------------------------
-|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.61s (67.92%) |Training time=0.46s (19.40%) |Others=0.30 (12.68%)|CurSamplesPerSec=13.51 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2246|ppo_ep: 1|act_loss: 0.0362548828125|cri_loss: 0.020599365234375|unsuper_loss: 0.0
-average reward score: 4.9765625
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.91%) |Training time=0.44s (19.68%) |Others=0.10 (4.41%)|CurSamplesPerSec=14.24 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2247|ppo_ep: 1|act_loss: -0.027587890625|cri_loss: -0.01079559326171875|unsuper_loss: 0.0
-average reward score: 5.12109375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.05%) |Training time=0.44s (20.38%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2248|ppo_ep: 1|act_loss: -0.033111572265625|cri_loss: -0.0156707763671875|unsuper_loss: 0.0
-average reward score: 5.359375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.29%) |Training time=0.46s (21.18%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.44
-[2023-04-14 10:10:31,997] [INFO] [logging.py:96:log_dist] [Rank 0] step=2250, skipped=26, lr=[8.124688423357883e-06, 8.124688423357883e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:10:32,015] [INFO] [timer.py:199:stop] epoch=0/micro_step=2250/global_step=2250, RunningAvgSamplesPerSec=106.928745519496, CurrSamplesPerSec=107.82194537475227, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:10:32,108] [INFO] [logging.py:96:log_dist] [Rank 0] step=2250, skipped=32, lr=[4.213891990643095e-06, 4.213891990643095e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2249|ppo_ep: 1|act_loss: -0.00763702392578125|cri_loss: -0.0036678314208984375|unsuper_loss: 0.0
-average reward score: 5.29296875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.27%) |Training time=0.46s (21.20%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2250|ppo_ep: 1|act_loss: -0.029144287109375|cri_loss: -0.013763427734375|unsuper_loss: 0.0
-average reward score: 5.921875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.33%) |Training time=0.46s (21.11%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2251|ppo_ep: 1|act_loss: -0.017852783203125|cri_loss: -0.00787353515625|unsuper_loss: 0.0
-average reward score: 5.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.28%) |Training time=0.46s (21.16%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2252|ppo_ep: 1|act_loss: 0.00494384765625|cri_loss: 0.0029144287109375|unsuper_loss: 0.0
-average reward score: 5.01171875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.80%) |Training time=0.47s (21.67%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2253|ppo_ep: 1|act_loss: 0.024017333984375|cri_loss: 0.0124053955078125|unsuper_loss: 0.0
-average reward score: 5.1015625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.15%) |Training time=0.46s (21.22%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2254|ppo_ep: 1|act_loss: 0.05157470703125|cri_loss: 0.0269622802734375|unsuper_loss: 0.0
-average reward score: 5.2109375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.19%) |Training time=0.49s (22.35%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2255|ppo_ep: 1|act_loss: 0.03814697265625|cri_loss: 0.0195159912109375|unsuper_loss: 0.0
-average reward score: 5.09375
--------------------------------------------------------------------------------------
-|E2E latency=3.26s |Gather latency=0.00s (0.00%) |Generate time=1.61s (49.47%) |Training time=0.46s (14.15%) |Others=1.19 (36.38%)|CurSamplesPerSec=9.81 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2256|ppo_ep: 1|act_loss: 0.008209228515625|cri_loss: 0.0043182373046875|unsuper_loss: 0.0
-average reward score: 4.98828125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.66%) |Training time=0.46s (20.83%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2257|ppo_ep: 1|act_loss: -0.010101318359375|cri_loss: -0.00496673583984375|unsuper_loss: 0.0
-average reward score: 5.4609375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.65s (74.92%) |Training time=0.45s (20.57%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2258|ppo_ep: 1|act_loss: -0.053955078125|cri_loss: -0.0260467529296875|unsuper_loss: 0.0
-average reward score: 5.26953125
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.09%) |Training time=0.47s (20.59%) |Others=0.10 (4.32%)|CurSamplesPerSec=14.04 |AvgSamplesPerSec=14.44
-[2023-04-14 10:10:54,960] [INFO] [logging.py:96:log_dist] [Rank 0] step=2260, skipped=26, lr=[8.11111078632855e-06, 8.11111078632855e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:10:54,978] [INFO] [timer.py:199:stop] epoch=0/micro_step=2260/global_step=2260, RunningAvgSamplesPerSec=106.92700638560709, CurrSamplesPerSec=110.86272184070586, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:10:55,071] [INFO] [logging.py:96:log_dist] [Rank 0] step=2260, skipped=32, lr=[4.206872138037964e-06, 4.206872138037964e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2259|ppo_ep: 1|act_loss: -0.022674560546875|cri_loss: -0.01006317138671875|unsuper_loss: 0.0
-average reward score: 5.421875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.50%) |Training time=0.45s (20.95%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2260|ppo_ep: 1|act_loss: -0.013671875|cri_loss: -0.0060577392578125|unsuper_loss: 0.0
-average reward score: 5.4921875
--------------------------------------------------------------------------------------
-|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.07%) |Training time=0.49s (20.76%) |Others=0.10 (4.17%)|CurSamplesPerSec=13.57 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2261|ppo_ep: 1|act_loss: 0.00531005859375|cri_loss: 0.00421905517578125|unsuper_loss: 0.0
-average reward score: 5.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.40%) |Training time=0.45s (21.05%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2262|ppo_ep: 1|act_loss: -0.034423828125|cri_loss: -0.0164947509765625|unsuper_loss: 0.0
-average reward score: 5.796875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.32%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2263|ppo_ep: 1|act_loss: -0.00730133056640625|cri_loss: -0.003337860107421875|unsuper_loss: 0.0
-average reward score: 5.4609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.46s (21.18%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2264|ppo_ep: 1|act_loss: 0.0159912109375|cri_loss: 0.0100555419921875|unsuper_loss: 0.0
-average reward score: 5.50390625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.28%) |Training time=0.46s (21.16%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2265|ppo_ep: 1|act_loss: 0.012115478515625|cri_loss: 0.00759124755859375|unsuper_loss: 0.0
-average reward score: 5.1328125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.26%) |Training time=0.46s (21.18%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2266|ppo_ep: 1|act_loss: 0.00399017333984375|cri_loss: 0.0026092529296875|unsuper_loss: 0.0
-average reward score: 5.171875
--------------------------------------------------------------------------------------
-|E2E latency=2.74s |Gather latency=0.00s (0.00%) |Generate time=1.60s (58.38%) |Training time=0.45s (16.54%) |Others=0.69 (25.08%)|CurSamplesPerSec=11.69 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2267|ppo_ep: 1|act_loss: -0.047210693359375|cri_loss: -0.0228271484375|unsuper_loss: 0.0
-average reward score: 5.58984375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.02%) |Training time=0.49s (22.45%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2268|ppo_ep: 1|act_loss: -0.0060577392578125|cri_loss: -0.00244140625|unsuper_loss: 0.0
-average reward score: 5.51171875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.28%) |Training time=0.48s (22.19%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.44
-[2023-04-14 10:11:17,364] [INFO] [logging.py:96:log_dist] [Rank 0] step=2270, skipped=26, lr=[8.097484441210055e-06, 8.097484441210055e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:11:17,382] [INFO] [timer.py:199:stop] epoch=0/micro_step=2270/global_step=2270, RunningAvgSamplesPerSec=106.91831941913372, CurrSamplesPerSec=103.83418432078822, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:11:17,475] [INFO] [logging.py:96:log_dist] [Rank 0] step=2270, skipped=32, lr=[4.199826985470142e-06, 4.199826985470142e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2269|ppo_ep: 1|act_loss: -0.04705810546875|cri_loss: -0.0223846435546875|unsuper_loss: 0.0
-average reward score: 5.1796875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.83%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2270|ppo_ep: 1|act_loss: -0.030059814453125|cri_loss: -0.01427459716796875|unsuper_loss: 0.0
-average reward score: 5.6015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.90%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2271|ppo_ep: 1|act_loss: -0.016143798828125|cri_loss: -0.0068817138671875|unsuper_loss: 0.0
-average reward score: 5.296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.51%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2272|ppo_ep: 1|act_loss: -0.05194091796875|cri_loss: -0.020294189453125|unsuper_loss: 0.0
-average reward score: 5.171875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.46s (21.55%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2273|ppo_ep: 1|act_loss: -0.018035888671875|cri_loss: -0.00811767578125|unsuper_loss: 0.0
-average reward score: 4.9921875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.68%) |Training time=0.47s (21.77%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2274|ppo_ep: 1|act_loss: 0.01580810546875|cri_loss: 0.0082855224609375|unsuper_loss: 0.0
-average reward score: 5.44921875
--------------------------------------------------------------------------------------
-|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.89%) |Training time=0.49s (20.92%) |Others=0.10 (4.19%)|CurSamplesPerSec=13.64 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2275|ppo_ep: 1|act_loss: 0.017425537109375|cri_loss: 0.00977325439453125|unsuper_loss: 0.0
-average reward score: 4.67578125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.81%) |Training time=0.47s (21.70%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2276|ppo_ep: 1|act_loss: -0.0180816650390625|cri_loss: -0.0082855224609375|unsuper_loss: 0.0
-average reward score: 5.01171875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.95%) |Training time=0.46s (21.50%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2277|ppo_ep: 1|act_loss: -0.01300048828125|cri_loss: -0.0060882568359375|unsuper_loss: 0.0
-average reward score: 5.51953125
--------------------------------------------------------------------------------------
-|E2E latency=3.46s |Gather latency=0.00s (0.00%) |Generate time=1.59s (45.97%) |Training time=0.47s (13.45%) |Others=1.41 (40.58%)|CurSamplesPerSec=9.24 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2278|ppo_ep: 1|act_loss: -0.017974853515625|cri_loss: -0.008453369140625|unsuper_loss: 0.0
-average reward score: 5.65234375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.90%) |Training time=0.46s (21.53%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.43
-[2023-04-14 10:11:40,451] [INFO] [logging.py:96:log_dist] [Rank 0] step=2280, skipped=26, lr=[8.083809589977701e-06, 8.083809589977701e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:11:40,469] [INFO] [timer.py:199:stop] epoch=0/micro_step=2280/global_step=2280, RunningAvgSamplesPerSec=106.90553455426142, CurrSamplesPerSec=104.34825010417897, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:11:40,562] [INFO] [logging.py:96:log_dist] [Rank 0] step=2280, skipped=32, lr=[4.192756637365787e-06, 4.192756637365787e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2279|ppo_ep: 1|act_loss: 0.0111846923828125|cri_loss: 0.00595855712890625|unsuper_loss: 0.0
-average reward score: 5.81640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.77%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2280|ppo_ep: 1|act_loss: 0.0008172988891601562|cri_loss: 0.0007848739624023438|unsuper_loss: 0.0
-average reward score: 5.0703125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.75%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2281|ppo_ep: 1|act_loss: 0.016265869140625|cri_loss: 0.00873565673828125|unsuper_loss: 0.0
-average reward score: 5.11328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.14%) |Training time=0.49s (22.33%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2282|ppo_ep: 1|act_loss: 0.050537109375|cri_loss: 0.027069091796875|unsuper_loss: 0.0
-average reward score: 5.98046875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.70%) |Training time=0.50s (22.79%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2283|ppo_ep: 1|act_loss: 0.00830078125|cri_loss: 0.004779815673828125|unsuper_loss: 0.0
-average reward score: 5.0234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.49s (22.37%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2284|ppo_ep: 1|act_loss: -0.0672607421875|cri_loss: -0.032012939453125|unsuper_loss: 0.0
-average reward score: 5.5390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.46s (21.46%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2285|ppo_ep: 1|act_loss: -0.0333251953125|cri_loss: -0.0162506103515625|unsuper_loss: 0.0
-average reward score: 5.1328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.34%) |Training time=0.43s (19.93%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2286|ppo_ep: 1|act_loss: 0.019287109375|cri_loss: 0.0107269287109375|unsuper_loss: 0.0
-average reward score: 5.265625
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.68s (75.92%) |Training time=0.44s (19.62%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.42 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2287|ppo_ep: 1|act_loss: 0.0294036865234375|cri_loss: 0.01515960693359375|unsuper_loss: 0.0
-average reward score: 5.5
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.98%) |Training time=0.44s (20.44%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2288|ppo_ep: 1|act_loss: 0.016387939453125|cri_loss: 0.00876617431640625|unsuper_loss: 0.0
-average reward score: 5.328125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.06%) |Training time=0.44s (20.36%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43
-[2023-04-14 10:12:02,342] [INFO] [logging.py:96:log_dist] [Rank 0] step=2290, skipped=26, lr=[8.070086435325772e-06, 8.070086435325772e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:12:02,361] [INFO] [timer.py:199:stop] epoch=0/micro_step=2290/global_step=2290, RunningAvgSamplesPerSec=106.89813915883823, CurrSamplesPerSec=87.2612355131024, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:12:02,454] [INFO] [logging.py:96:log_dist] [Rank 0] step=2290, skipped=32, lr=[4.185661198524513e-06, 4.185661198524513e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2289|ppo_ep: 1|act_loss: 0.00677490234375|cri_loss: 0.0037097930908203125|unsuper_loss: 0.0
-average reward score: 5.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.71s (72.81%) |Training time=0.54s (23.01%) |Others=0.10 (4.18%)|CurSamplesPerSec=13.63 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2290|ppo_ep: 1|act_loss: 0.04388427734375|cri_loss: 0.0227508544921875|unsuper_loss: 0.0
-average reward score: 5.14453125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.49%) |Training time=0.43s (19.93%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2291|ppo_ep: 1|act_loss: 0.035247802734375|cri_loss: 0.0183563232421875|unsuper_loss: 0.0
-average reward score: 5.1484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.32%) |Training time=0.43s (20.11%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2292|ppo_ep: 1|act_loss: -0.0136871337890625|cri_loss: -0.0064544677734375|unsuper_loss: 0.0
-average reward score: 5.41796875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.48%) |Training time=0.45s (20.92%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2293|ppo_ep: 1|act_loss: 0.006671905517578125|cri_loss: 0.0039825439453125|unsuper_loss: 0.0
-average reward score: 5.296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.66%) |Training time=0.45s (20.76%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2294|ppo_ep: 1|act_loss: -0.09033203125|cri_loss: -0.040313720703125|unsuper_loss: 0.0
-average reward score: 5.13671875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.79%) |Training time=0.44s (20.63%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2295|ppo_ep: 1|act_loss: -0.0204925537109375|cri_loss: -0.00887298583984375|unsuper_loss: 0.0
-average reward score: 5.5625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.41%) |Training time=0.45s (20.84%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2296|ppo_ep: 1|act_loss: -0.05853271484375|cri_loss: -0.028350830078125|unsuper_loss: 0.0
-average reward score: 4.9140625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.35%) |Training time=0.45s (21.07%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2297|ppo_ep: 1|act_loss: -0.0142822265625|cri_loss: -0.00630950927734375|unsuper_loss: 0.0
-average reward score: 5.24609375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.05%) |Training time=0.49s (22.44%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2298|ppo_ep: 1|act_loss: 0.0034999847412109375|cri_loss: 0.002185821533203125|unsuper_loss: 0.0
-average reward score: 6.1015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.88%) |Training time=0.47s (21.58%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.44
-[2023-04-14 10:12:23,932] [INFO] [logging.py:96:log_dist] [Rank 0] step=2300, skipped=26, lr=[8.056315180664529e-06, 8.056315180664529e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:12:23,950] [INFO] [timer.py:199:stop] epoch=0/micro_step=2300/global_step=2300, RunningAvgSamplesPerSec=106.91339089598668, CurrSamplesPerSec=109.58776763603377, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:12:24,042] [INFO] [logging.py:96:log_dist] [Rank 0] step=2300, skipped=32, lr=[4.17854077411784e-06, 4.17854077411784e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2299|ppo_ep: 1|act_loss: -0.0406494140625|cri_loss: -0.0195770263671875|unsuper_loss: 0.0
-average reward score: 4.9453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (21.08%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2300|ppo_ep: 1|act_loss: 0.0259552001953125|cri_loss: 0.0132904052734375|unsuper_loss: 0.0
-average reward score: 5.515625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.25%) |Training time=0.46s (21.18%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2301|ppo_ep: 1|act_loss: 0.07537841796875|cri_loss: 0.038604736328125|unsuper_loss: 0.0
-average reward score: 5.1171875
--------------------------------------------------------------------------------------
-|E2E latency=2.46s |Gather latency=0.00s (0.00%) |Generate time=1.59s (64.52%) |Training time=0.46s (18.84%) |Others=0.41 (16.63%)|CurSamplesPerSec=12.98 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2302|ppo_ep: 1|act_loss: 0.051544189453125|cri_loss: 0.0265960693359375|unsuper_loss: 0.0
-average reward score: 5.265625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.77%) |Training time=0.47s (21.71%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2303|ppo_ep: 1|act_loss: 0.0904541015625|cri_loss: 0.047149658203125|unsuper_loss: 0.0
-average reward score: 5.05078125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.47s (21.67%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2304|ppo_ep: 1|act_loss: -0.0343017578125|cri_loss: -0.0167236328125|unsuper_loss: 0.0
-average reward score: 5.24609375
--------------------------------------------------------------------------------------
-|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.92%) |Training time=0.49s (20.88%) |Others=0.10 (4.20%)|CurSamplesPerSec=13.64 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2305|ppo_ep: 1|act_loss: -0.024200439453125|cri_loss: -0.0115966796875|unsuper_loss: 0.0
-average reward score: 5.12109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.96%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2306|ppo_ep: 1|act_loss: 0.001941680908203125|cri_loss: 0.0023021697998046875|unsuper_loss: 0.0
-average reward score: 5.3671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.84%) |Training time=0.47s (21.61%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2307|ppo_ep: 1|act_loss: -0.0360107421875|cri_loss: -0.0176544189453125|unsuper_loss: 0.0
-average reward score: 5.34375
--------------------------------------------------------------------------------------
-|E2E latency=2.61s |Gather latency=0.00s (0.00%) |Generate time=1.59s (60.94%) |Training time=0.46s (17.63%) |Others=0.56 (21.43%)|CurSamplesPerSec=12.25 |AvgSamplesPerSec=14.43
-epoch: 0|step: 2308|ppo_ep: 1|act_loss: -0.017486572265625|cri_loss: -0.00789642333984375|unsuper_loss: 0.0
-average reward score: 5.046875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.27%) |Training time=0.48s (22.18%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.43
-[2023-04-14 10:12:46,476] [INFO] [logging.py:96:log_dist] [Rank 0] step=2310, skipped=26, lr=[8.042496030117185e-06, 8.042496030117185e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:12:46,494] [INFO] [timer.py:199:stop] epoch=0/micro_step=2310/global_step=2310, RunningAvgSamplesPerSec=106.90101331828065, CurrSamplesPerSec=105.29688544396153, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:12:46,587] [INFO] [logging.py:96:log_dist] [Rank 0] step=2310, skipped=32, lr=[4.171395469687636e-06, 4.171395469687636e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2309|ppo_ep: 1|act_loss: 0.034759521484375|cri_loss: 0.017852783203125|unsuper_loss: 0.0
-average reward score: 5.5703125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.76%) |Training time=0.47s (21.70%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2310|ppo_ep: 1|act_loss: 0.018341064453125|cri_loss: 0.0097503662109375|unsuper_loss: 0.0
-average reward score: 5.41015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.88%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2311|ppo_ep: 1|act_loss: 0.01898193359375|cri_loss: 0.0101470947265625|unsuper_loss: 0.0
-average reward score: 5.68359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.27%) |Training time=0.48s (22.20%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.44
-[2023-04-14 10:12:53,077] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 2312|ppo_ep: 1|act_loss: 0.0770263671875|cri_loss: 0.04034423828125|unsuper_loss: 0.0
-average reward score: 5.78515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.49s (22.57%) |Others=0.09 (4.04%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.44
-[2023-04-14 10:12:55,219] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 2313|ppo_ep: 1|act_loss: 0.0029315948486328125|cri_loss: 0.0021209716796875|unsuper_loss: 0.0
-average reward score: 5.6171875
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.59s (69.81%) |Training time=0.47s (20.57%) |Others=0.22 (9.62%)|CurSamplesPerSec=14.08 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2314|ppo_ep: 1|act_loss: -0.02044677734375|cri_loss: -0.01010894775390625|unsuper_loss: 0.0
-average reward score: 4.8828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.90%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2315|ppo_ep: 1|act_loss: 0.0189056396484375|cri_loss: 0.01038360595703125|unsuper_loss: 0.0
-average reward score: 5.1796875
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.62s (71.70%) |Training time=0.50s (21.90%) |Others=0.14 (6.40%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2316|ppo_ep: 1|act_loss: 0.01366424560546875|cri_loss: 0.00711822509765625|unsuper_loss: 0.0
-average reward score: 5.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.82%) |Training time=0.46s (21.32%) |Others=0.11 (4.86%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2317|ppo_ep: 1|act_loss: 0.034149169921875|cri_loss: 0.0182647705078125|unsuper_loss: 0.0
-average reward score: 5.39453125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.43%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2318|ppo_ep: 1|act_loss: -0.0007562637329101562|cri_loss: -0.00014853477478027344|unsuper_loss: 0.0
-average reward score: 5.55859375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.59%) |Training time=0.47s (21.83%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.44
-[2023-04-14 10:13:08,300] [INFO] [logging.py:96:log_dist] [Rank 0] step=2320, skipped=26, lr=[8.028629188516887e-06, 8.028629188516887e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:13:08,539] [INFO] [timer.py:199:stop] epoch=0/micro_step=2320/global_step=2320, RunningAvgSamplesPerSec=106.84855308456247, CurrSamplesPerSec=61.47600233413061, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:13:08,632] [INFO] [logging.py:96:log_dist] [Rank 0] step=2320, skipped=34, lr=[4.165661383686135e-06, 4.165661383686135e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2319|ppo_ep: 1|act_loss: -0.05047607421875|cri_loss: -0.02435302734375|unsuper_loss: 0.0
-average reward score: 5.30859375
--------------------------------------------------------------------------------------
-|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.59s (67.09%) |Training time=0.68s (28.79%) |Others=0.10 (4.11%)|CurSamplesPerSec=13.49 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2320|ppo_ep: 1|act_loss: 0.0015115737915039062|cri_loss: 0.0011758804321289062|unsuper_loss: 0.0
-average reward score: 5.05859375
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.88%) |Training time=0.49s (20.92%) |Others=0.10 (4.19%)|CurSamplesPerSec=13.65 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2321|ppo_ep: 1|act_loss: -0.0006151199340820312|cri_loss: -3.0040740966796875e-05|unsuper_loss: 0.0
-average reward score: 5.44140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.73%) |Training time=0.47s (21.70%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2322|ppo_ep: 1|act_loss: -0.0045928955078125|cri_loss: -0.00206756591796875|unsuper_loss: 0.0
-average reward score: 5.6953125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.38%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2323|ppo_ep: 1|act_loss: 0.0068511962890625|cri_loss: 0.00389862060546875|unsuper_loss: 0.0
-average reward score: 5.37890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.47s (21.63%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2324|ppo_ep: 1|act_loss: 0.016845703125|cri_loss: 0.0091400146484375|unsuper_loss: 0.0
-average reward score: 5.46484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.91%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2325|ppo_ep: 1|act_loss: -0.028961181640625|cri_loss: -0.01421356201171875|unsuper_loss: 0.0
-average reward score: 5.5625
--------------------------------------------------------------------------------------
-|E2E latency=2.08s |Gather latency=0.00s (0.00%) |Generate time=1.59s (76.63%) |Training time=0.39s (18.61%) |Others=0.10 (4.76%)|CurSamplesPerSec=15.42 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2326|ppo_ep: 1|act_loss: 0.0291290283203125|cri_loss: 0.01497650146484375|unsuper_loss: 0.0
-average reward score: 5.06640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.48s (22.07%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2327|ppo_ep: 1|act_loss: 0.020904541015625|cri_loss: 0.01064300537109375|unsuper_loss: 0.0
-average reward score: 5.41015625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.46s (21.28%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44
-[2023-04-14 10:13:28,936] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 2328|ppo_ep: 1|act_loss: 0.05792236328125|cri_loss: 0.03179931640625|unsuper_loss: 0.0
-average reward score: 5.08203125
--------------------------------------------------------------------------------------
-|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.59s (75.04%) |Training time=0.43s (20.32%) |Others=0.10 (4.64%)|CurSamplesPerSec=15.13 |AvgSamplesPerSec=14.44
-[2023-04-14 10:13:31,062] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-[2023-04-14 10:13:31,063] [INFO] [logging.py:96:log_dist] [Rank 0] step=2330, skipped=28, lr=[8.017501515777759e-06, 8.017501515777759e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:13:31,063] [INFO] [timer.py:199:stop] epoch=0/micro_step=2330/global_step=2330, RunningAvgSamplesPerSec=106.86175914958172, CurrSamplesPerSec=123.63699739954438, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:13:31,155] [INFO] [logging.py:96:log_dist] [Rank 0] step=2330, skipped=34, lr=[4.158471562355229e-06, 4.158471562355229e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2329|ppo_ep: 1|act_loss: -0.018341064453125|cri_loss: -0.00897216796875|unsuper_loss: 0.0
-average reward score: 5.40234375
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.62%) |Training time=0.42s (19.78%) |Others=0.10 (4.59%)|CurSamplesPerSec=15.05 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2330|ppo_ep: 1|act_loss: 0.0045166015625|cri_loss: 0.002838134765625|unsuper_loss: 0.0
-average reward score: 4.87109375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.14%) |Training time=0.45s (21.26%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2331|ppo_ep: 1|act_loss: 0.00347137451171875|cri_loss: 0.0021820068359375|unsuper_loss: 0.0
-average reward score: 5.37109375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.17%) |Training time=0.45s (21.23%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2332|ppo_ep: 1|act_loss: -0.056640625|cri_loss: -0.02764892578125|unsuper_loss: 0.0
-average reward score: 5.0625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.34%) |Training time=0.45s (21.06%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2333|ppo_ep: 1|act_loss: -0.02557373046875|cri_loss: -0.01247406005859375|unsuper_loss: 0.0
-average reward score: 5.76953125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.17%) |Training time=0.45s (21.24%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2334|ppo_ep: 1|act_loss: 0.012054443359375|cri_loss: 0.006420135498046875|unsuper_loss: 0.0
-average reward score: 4.94921875
--------------------------------------------------------------------------------------
-|E2E latency=2.43s |Gather latency=0.00s (0.00%) |Generate time=1.75s (71.93%) |Training time=0.45s (18.59%) |Others=0.23 (9.47%)|CurSamplesPerSec=13.14 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2335|ppo_ep: 1|act_loss: 0.03173828125|cri_loss: 0.01629638671875|unsuper_loss: 0.0
-average reward score: 5.796875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.22%) |Training time=0.45s (21.18%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2336|ppo_ep: 1|act_loss: 0.0335693359375|cri_loss: 0.0183258056640625|unsuper_loss: 0.0
-average reward score: 4.7265625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.28%) |Training time=0.45s (21.12%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2337|ppo_ep: 1|act_loss: -0.042266845703125|cri_loss: -0.0205078125|unsuper_loss: 0.0
-average reward score: 5.2734375
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.27%) |Training time=0.45s (21.11%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2338|ppo_ep: 1|act_loss: 0.015228271484375|cri_loss: 0.0080108642578125|unsuper_loss: 0.0
-average reward score: 5.4921875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.79%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44
-[2023-04-14 10:13:52,736] [INFO] [logging.py:96:log_dist] [Rank 0] step=2340, skipped=28, lr=[8.003549348716149e-06, 8.003549348716149e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:13:52,754] [INFO] [timer.py:199:stop] epoch=0/micro_step=2340/global_step=2340, RunningAvgSamplesPerSec=106.87153709245142, CurrSamplesPerSec=106.25905734856126, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:13:52,846] [INFO] [logging.py:96:log_dist] [Rank 0] step=2340, skipped=34, lr=[4.151257158474923e-06, 4.151257158474923e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2339|ppo_ep: 1|act_loss: 0.041748046875|cri_loss: 0.02178955078125|unsuper_loss: 0.0
-average reward score: 5.484375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.76%) |Training time=0.46s (21.67%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2340|ppo_ep: 1|act_loss: -0.0221710205078125|cri_loss: -0.01041412353515625|unsuper_loss: 0.0
-average reward score: 5.3359375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.82%) |Training time=0.46s (21.58%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2341|ppo_ep: 1|act_loss: 0.018798828125|cri_loss: 0.0099029541015625|unsuper_loss: 0.0
-average reward score: 4.84765625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.10%) |Training time=0.46s (21.32%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2342|ppo_ep: 1|act_loss: 0.00762939453125|cri_loss: 0.007808685302734375|unsuper_loss: 0.0
-average reward score: 5.7578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.23%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2343|ppo_ep: 1|act_loss: -0.029632568359375|cri_loss: -0.01442718505859375|unsuper_loss: 0.0
-average reward score: 5.5
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.46s (21.22%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2344|ppo_ep: 1|act_loss: 0.008575439453125|cri_loss: 0.004589080810546875|unsuper_loss: 0.0
-average reward score: 5.109375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.68%) |Training time=0.45s (20.80%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2345|ppo_ep: 1|act_loss: 0.0219573974609375|cri_loss: 0.01154327392578125|unsuper_loss: 0.0
-average reward score: 5.65234375
--------------------------------------------------------------------------------------
-|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.69s (75.47%) |Training time=0.45s (20.13%) |Others=0.10 (4.39%)|CurSamplesPerSec=14.29 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2346|ppo_ep: 1|act_loss: 0.00861358642578125|cri_loss: 0.00467681884765625|unsuper_loss: 0.0
-average reward score: 5.6328125
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.32%) |Training time=0.45s (21.07%) |Others=0.10 (4.62%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2347|ppo_ep: 1|act_loss: 0.0147552490234375|cri_loss: 0.00815582275390625|unsuper_loss: 0.0
-average reward score: 5.31640625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.33%) |Training time=0.45s (21.07%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2348|ppo_ep: 1|act_loss: -0.0386962890625|cri_loss: -0.01849365234375|unsuper_loss: 0.0
-average reward score: 5.12109375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.37%) |Training time=0.45s (21.04%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.44
-[2023-04-14 10:14:14,328] [INFO] [logging.py:96:log_dist] [Rank 0] step=2350, skipped=28, lr=[7.989550067885356e-06, 7.989550067885356e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:14:14,860] [INFO] [timer.py:199:stop] epoch=0/micro_step=2350/global_step=2350, RunningAvgSamplesPerSec=106.79837881327074, CurrSamplesPerSec=38.86040216651423, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:14:14,953] [INFO] [logging.py:96:log_dist] [Rank 0] step=2350, skipped=34, lr=[4.1440182789800875e-06, 4.1440182789800875e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2349|ppo_ep: 1|act_loss: -0.01824951171875|cri_loss: -0.00884246826171875|unsuper_loss: 0.0
-average reward score: 5.453125
--------------------------------------------------------------------------------------
-|E2E latency=2.67s |Gather latency=0.00s (0.00%) |Generate time=1.59s (59.43%) |Training time=0.99s (36.91%) |Others=0.10 (3.65%)|CurSamplesPerSec=11.98 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2350|ppo_ep: 1|act_loss: 0.01251220703125|cri_loss: 0.00763702392578125|unsuper_loss: 0.0
-average reward score: 5.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.45s (21.20%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2351|ppo_ep: 1|act_loss: 0.006336212158203125|cri_loss: 0.0037937164306640625|unsuper_loss: 0.0
-average reward score: 5.35546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.34%) |Training time=0.45s (21.07%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2352|ppo_ep: 1|act_loss: -0.009918212890625|cri_loss: -0.004642486572265625|unsuper_loss: 0.0
-average reward score: 5.77734375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.47s (21.65%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2353|ppo_ep: 1|act_loss: 0.069091796875|cri_loss: 0.03546142578125|unsuper_loss: 0.0
-average reward score: 4.89453125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.75%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2354|ppo_ep: 1|act_loss: 0.0645751953125|cri_loss: 0.033111572265625|unsuper_loss: 0.0
-average reward score: 5.7109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.94%) |Training time=0.46s (21.48%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2355|ppo_ep: 1|act_loss: -0.000469207763671875|cri_loss: 0.00017404556274414062|unsuper_loss: 0.0
-average reward score: 5.0859375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.80%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2356|ppo_ep: 1|act_loss: 0.0271759033203125|cri_loss: 0.0141143798828125|unsuper_loss: 0.0
-average reward score: 5.109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.87%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2357|ppo_ep: 1|act_loss: -0.05279541015625|cri_loss: -0.025543212890625|unsuper_loss: 0.0
-average reward score: 5.7265625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.46s (21.55%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2358|ppo_ep: 1|act_loss: -0.038299560546875|cri_loss: -0.017913818359375|unsuper_loss: 0.0
-average reward score: 5.3671875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.85%) |Training time=0.46s (21.58%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44
-[2023-04-14 10:14:36,326] [INFO] [logging.py:96:log_dist] [Rank 0] step=2360, skipped=28, lr=[7.975503880788498e-06, 7.975503880788498e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:14:36,344] [INFO] [timer.py:199:stop] epoch=0/micro_step=2360/global_step=2360, RunningAvgSamplesPerSec=106.79686323206259, CurrSamplesPerSec=106.73972149543116, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:14:36,437] [INFO] [logging.py:96:log_dist] [Rank 0] step=2360, skipped=34, lr=[4.13675503116838e-06, 4.13675503116838e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2359|ppo_ep: 1|act_loss: -0.00870513916015625|cri_loss: -0.003223419189453125|unsuper_loss: 0.0
-average reward score: 5.69140625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.81%) |Training time=0.46s (21.61%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2360|ppo_ep: 1|act_loss: -0.0218505859375|cri_loss: -0.010284423828125|unsuper_loss: 0.0
-average reward score: 5.37890625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.83%) |Training time=0.46s (21.59%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2361|ppo_ep: 1|act_loss: 0.003803253173828125|cri_loss: 0.002079010009765625|unsuper_loss: 0.0
-average reward score: 5.25390625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.04%) |Training time=0.46s (21.35%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2362|ppo_ep: 1|act_loss: 0.09405517578125|cri_loss: 0.04913330078125|unsuper_loss: 0.0
-average reward score: 6.08203125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.81%) |Training time=0.46s (21.61%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2363|ppo_ep: 1|act_loss: 0.03997802734375|cri_loss: 0.0205230712890625|unsuper_loss: 0.0
-average reward score: 5.0234375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.92%) |Training time=0.46s (21.50%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2364|ppo_ep: 1|act_loss: -0.01141357421875|cri_loss: -0.00485992431640625|unsuper_loss: 0.0
-average reward score: 5.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.68s |Gather latency=0.00s (0.00%) |Generate time=1.58s (58.98%) |Training time=0.46s (17.17%) |Others=0.64 (23.86%)|CurSamplesPerSec=11.94 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2365|ppo_ep: 1|act_loss: 0.00457000732421875|cri_loss: 0.00278472900390625|unsuper_loss: 0.0
-average reward score: 4.96875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.90%) |Training time=0.46s (21.50%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2366|ppo_ep: 1|act_loss: 0.01678466796875|cri_loss: 0.00885772705078125|unsuper_loss: 0.0
-average reward score: 5.65234375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.14%) |Training time=0.45s (21.26%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2367|ppo_ep: 1|act_loss: -0.0163726806640625|cri_loss: -0.0079345703125|unsuper_loss: 0.0
-average reward score: 5.48828125
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.99%) |Training time=0.46s (21.40%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2368|ppo_ep: 1|act_loss: 0.0004329681396484375|cri_loss: 0.0009245872497558594|unsuper_loss: 0.0
-average reward score: 5.1171875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.72%) |Training time=0.46s (21.69%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44
-[2023-04-14 10:14:58,277] [INFO] [logging.py:96:log_dist] [Rank 0] step=2370, skipped=28, lr=[7.961410995623948e-06, 7.961410995623948e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:14:58,296] [INFO] [timer.py:199:stop] epoch=0/micro_step=2370/global_step=2370, RunningAvgSamplesPerSec=106.79987659925104, CurrSamplesPerSec=108.05257971649202, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:14:58,388] [INFO] [logging.py:96:log_dist] [Rank 0] step=2370, skipped=34, lr=[4.129467522698653e-06, 4.129467522698653e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2369|ppo_ep: 1|act_loss: -0.04071044921875|cri_loss: -0.01959228515625|unsuper_loss: 0.0
-average reward score: 5.02734375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.41%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2370|ppo_ep: 1|act_loss: -0.04754638671875|cri_loss: -0.023162841796875|unsuper_loss: 0.0
-average reward score: 5.46484375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.87%) |Training time=0.46s (21.55%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2371|ppo_ep: 1|act_loss: -0.02392578125|cri_loss: -0.0097503662109375|unsuper_loss: 0.0
-average reward score: 5.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.05%) |Training time=0.46s (21.36%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2372|ppo_ep: 1|act_loss: 0.03375244140625|cri_loss: 0.0176544189453125|unsuper_loss: 0.0
-average reward score: 5.765625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.70%) |Training time=0.44s (20.72%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2373|ppo_ep: 1|act_loss: 0.03924560546875|cri_loss: 0.02032470703125|unsuper_loss: 0.0
-average reward score: 5.7109375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.96%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2374|ppo_ep: 1|act_loss: 0.0574951171875|cri_loss: 0.03045654296875|unsuper_loss: 0.0
-average reward score: 5.01171875
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.61%) |Training time=0.45s (20.03%) |Others=0.10 (4.36%)|CurSamplesPerSec=14.20 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2375|ppo_ep: 1|act_loss: 0.0867919921875|cri_loss: 0.047027587890625|unsuper_loss: 0.0
-average reward score: 5.3203125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.13%) |Training time=0.46s (21.29%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2376|ppo_ep: 1|act_loss: 0.0019407272338867188|cri_loss: 0.00193023681640625|unsuper_loss: 0.0
-average reward score: 4.84765625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.05%) |Training time=0.46s (21.36%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2377|ppo_ep: 1|act_loss: -0.01904296875|cri_loss: -0.00902557373046875|unsuper_loss: 0.0
-average reward score: 4.78515625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.71%) |Training time=0.47s (21.71%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2378|ppo_ep: 1|act_loss: -0.0322265625|cri_loss: -0.01522064208984375|unsuper_loss: 0.0
-average reward score: 5.1640625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.90%) |Training time=0.46s (21.53%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44
-[2023-04-14 10:15:19,996] [INFO] [logging.py:96:log_dist] [Rank 0] step=2380, skipped=28, lr=[7.947271621282263e-06, 7.947271621282263e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:15:20,014] [INFO] [timer.py:199:stop] epoch=0/micro_step=2380/global_step=2380, RunningAvgSamplesPerSec=106.80961062155866, CurrSamplesPerSec=108.14730586013292, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:15:20,107] [INFO] [logging.py:96:log_dist] [Rank 0] step=2380, skipped=34, lr=[4.122155861589364e-06, 4.122155861589364e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2379|ppo_ep: 1|act_loss: -0.03759765625|cri_loss: -0.0180816650390625|unsuper_loss: 0.0
-average reward score: 5.453125
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.98%) |Training time=0.46s (19.79%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.80 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2380|ppo_ep: 1|act_loss: -0.0775146484375|cri_loss: -0.037384033203125|unsuper_loss: 0.0
-average reward score: 5.37890625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.38%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2381|ppo_ep: 1|act_loss: 0.005031585693359375|cri_loss: 0.0032672882080078125|unsuper_loss: 0.0
-average reward score: 5.078125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.38%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2382|ppo_ep: 1|act_loss: 0.00275421142578125|cri_loss: 0.0020999908447265625|unsuper_loss: 0.0
-average reward score: 5.24609375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.44%) |Training time=0.47s (21.99%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2383|ppo_ep: 1|act_loss: 0.025909423828125|cri_loss: 0.0133514404296875|unsuper_loss: 0.0
-average reward score: 5.1328125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.63%) |Training time=0.47s (21.79%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2384|ppo_ep: 1|act_loss: -0.0069427490234375|cri_loss: -0.003017425537109375|unsuper_loss: 0.0
-average reward score: 5.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.10%) |Training time=0.46s (21.36%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2385|ppo_ep: 1|act_loss: 0.0206756591796875|cri_loss: 0.01073455810546875|unsuper_loss: 0.0
-average reward score: 5.2109375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.97%) |Training time=0.46s (21.40%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2386|ppo_ep: 1|act_loss: 0.0285186767578125|cri_loss: 0.015472412109375|unsuper_loss: 0.0
-average reward score: 4.9453125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.77%) |Training time=0.46s (21.63%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2387|ppo_ep: 1|act_loss: 0.01178741455078125|cri_loss: 0.006023406982421875|unsuper_loss: 0.0
-average reward score: 5.60546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.81%) |Training time=0.47s (21.62%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2388|ppo_ep: 1|act_loss: 0.00728607177734375|cri_loss: 0.004486083984375|unsuper_loss: 0.0
-average reward score: 5.4453125
--------------------------------------------------------------------------------------
-|E2E latency=3.55s |Gather latency=0.00s (0.00%) |Generate time=1.59s (44.74%) |Training time=0.47s (13.16%) |Others=1.49 (42.10%)|CurSamplesPerSec=9.02 |AvgSamplesPerSec=14.44
-[2023-04-14 10:15:42,901] [INFO] [logging.py:96:log_dist] [Rank 0] step=2390, skipped=28, lr=[7.933085967343084e-06, 7.933085967343084e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:15:42,920] [INFO] [timer.py:199:stop] epoch=0/micro_step=2390/global_step=2390, RunningAvgSamplesPerSec=106.80495752503842, CurrSamplesPerSec=102.83638302795524, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:15:43,012] [INFO] [logging.py:96:log_dist] [Rank 0] step=2390, skipped=34, lr=[4.114820156216969e-06, 4.114820156216969e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2389|ppo_ep: 1|act_loss: 0.00827789306640625|cri_loss: 0.004276275634765625|unsuper_loss: 0.0
-average reward score: 5.98828125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.41%) |Training time=0.47s (22.04%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2390|ppo_ep: 1|act_loss: 0.07122802734375|cri_loss: 0.03704833984375|unsuper_loss: 0.0
-average reward score: 5.60546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.09%) |Training time=0.48s (22.36%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2391|ppo_ep: 1|act_loss: -0.0092926025390625|cri_loss: -0.0042572021484375|unsuper_loss: 0.0
-average reward score: 5.3984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.96%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2392|ppo_ep: 1|act_loss: 0.034332275390625|cri_loss: 0.018798828125|unsuper_loss: 0.0
-average reward score: 5.453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.53%) |Training time=0.47s (21.91%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2393|ppo_ep: 1|act_loss: 0.005512237548828125|cri_loss: 0.0033626556396484375|unsuper_loss: 0.0
-average reward score: 5.21484375
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.59s (70.45%) |Training time=0.48s (21.54%) |Others=0.18 (8.01%)|CurSamplesPerSec=14.22 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2394|ppo_ep: 1|act_loss: -0.0863037109375|cri_loss: -0.039642333984375|unsuper_loss: 0.0
-average reward score: 5.08984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.50%) |Training time=0.47s (21.93%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2395|ppo_ep: 1|act_loss: 0.0174560546875|cri_loss: 0.0109100341796875|unsuper_loss: 0.0
-average reward score: 5.234375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.88%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2396|ppo_ep: 1|act_loss: 0.0147552490234375|cri_loss: 0.0081024169921875|unsuper_loss: 0.0
-average reward score: 5.15625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.87%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2397|ppo_ep: 1|act_loss: 0.04022216796875|cri_loss: 0.02227783203125|unsuper_loss: 0.0
-average reward score: 5.05078125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.90%) |Training time=0.46s (21.53%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2398|ppo_ep: 1|act_loss: 0.00225067138671875|cri_loss: 0.002269744873046875|unsuper_loss: 0.0
-average reward score: 5.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.80%) |Training time=0.47s (21.64%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.44
-[2023-04-14 10:16:04,555] [INFO] [logging.py:96:log_dist] [Rank 0] step=2400, skipped=28, lr=[7.918854244072016e-06, 7.918854244072016e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:16:04,930] [INFO] [timer.py:199:stop] epoch=0/micro_step=2400/global_step=2400, RunningAvgSamplesPerSec=106.73590195556389, CurrSamplesPerSec=48.33765913846442, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:16:05,022] [INFO] [logging.py:96:log_dist] [Rank 0] step=2400, skipped=34, lr=[4.107460515314316e-06, 4.107460515314316e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2399|ppo_ep: 1|act_loss: 0.03839111328125|cri_loss: 0.019744873046875|unsuper_loss: 0.0
-average reward score: 5.015625
--------------------------------------------------------------------------------------
-|E2E latency=2.51s |Gather latency=0.00s (0.00%) |Generate time=1.59s (63.25%) |Training time=0.83s (32.86%) |Others=0.10 (3.89%)|CurSamplesPerSec=12.74 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2400|ppo_ep: 1|act_loss: 0.0072479248046875|cri_loss: 0.0039520263671875|unsuper_loss: 0.0
-average reward score: 5.57421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.46%) |Training time=0.48s (22.00%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2401|ppo_ep: 1|act_loss: 0.014312744140625|cri_loss: 0.00835418701171875|unsuper_loss: 0.0
-average reward score: 5.26953125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.94%) |Training time=0.46s (21.49%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2402|ppo_ep: 1|act_loss: -0.0147247314453125|cri_loss: -0.0068511962890625|unsuper_loss: 0.0
-average reward score: 4.9453125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.29%) |Training time=0.49s (22.20%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2403|ppo_ep: 1|act_loss: -0.0408935546875|cri_loss: -0.0194549560546875|unsuper_loss: 0.0
-average reward score: 5.0859375
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.99%) |Training time=0.47s (20.65%) |Others=0.10 (4.36%)|CurSamplesPerSec=14.17 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2404|ppo_ep: 1|act_loss: -0.0189361572265625|cri_loss: -0.0086669921875|unsuper_loss: 0.0
-average reward score: 5.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.49%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2405|ppo_ep: 1|act_loss: -0.0101776123046875|cri_loss: -0.0037841796875|unsuper_loss: 0.0
-average reward score: 5.71875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.85%) |Training time=0.46s (21.57%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2406|ppo_ep: 1|act_loss: -0.02496337890625|cri_loss: -0.0120086669921875|unsuper_loss: 0.0
-average reward score: 5.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.92%) |Training time=0.46s (21.52%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2407|ppo_ep: 1|act_loss: -0.023956298828125|cri_loss: -0.01080322265625|unsuper_loss: 0.0
-average reward score: 5.08203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.09%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2408|ppo_ep: 1|act_loss: -0.03350830078125|cri_loss: -0.0165557861328125|unsuper_loss: 0.0
-average reward score: 5.5390625
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.79%) |Training time=0.49s (21.01%) |Others=0.10 (4.20%)|CurSamplesPerSec=13.68 |AvgSamplesPerSec=14.44
-[2023-04-14 10:16:26,789] [INFO] [logging.py:96:log_dist] [Rank 0] step=2410, skipped=28, lr=[7.904576662417536e-06, 7.904576662417536e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:16:26,807] [INFO] [timer.py:199:stop] epoch=0/micro_step=2410/global_step=2410, RunningAvgSamplesPerSec=106.72267940839932, CurrSamplesPerSec=106.29027667458327, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:16:26,900] [INFO] [logging.py:96:log_dist] [Rank 0] step=2410, skipped=34, lr=[4.10007704796904e-06, 4.10007704796904e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2409|ppo_ep: 1|act_loss: 0.013824462890625|cri_loss: 0.00738525390625|unsuper_loss: 0.0
-average reward score: 5.3671875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.52%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2410|ppo_ep: 1|act_loss: 0.06011962890625|cri_loss: 0.030853271484375|unsuper_loss: 0.0
-average reward score: 5.49609375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.51%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2411|ppo_ep: 1|act_loss: 0.0352783203125|cri_loss: 0.0180816650390625|unsuper_loss: 0.0
-average reward score: 5.81640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.46s (21.42%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2412|ppo_ep: 1|act_loss: -0.01898193359375|cri_loss: -0.0083770751953125|unsuper_loss: 0.0
-average reward score: 4.51953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.83%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2413|ppo_ep: 1|act_loss: 0.0094451904296875|cri_loss: 0.005046844482421875|unsuper_loss: 0.0
-average reward score: 5.3515625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.81%) |Training time=0.46s (21.62%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44
-[2023-04-14 10:16:37,982] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 2414|ppo_ep: 1|act_loss: 0.016632080078125|cri_loss: 0.00992584228515625|unsuper_loss: 0.0
-average reward score: 5.01171875
--------------------------------------------------------------------------------------
-|E2E latency=2.06s |Gather latency=0.00s (0.00%) |Generate time=1.58s (76.94%) |Training time=0.39s (18.77%) |Others=0.09 (4.29%)|CurSamplesPerSec=15.53 |AvgSamplesPerSec=14.44
-[2023-04-14 10:16:40,127] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 2415|ppo_ep: 1|act_loss: 0.0082550048828125|cri_loss: 0.00455474853515625|unsuper_loss: 0.0
-average reward score: 5.265625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.01%) |Training time=0.47s (21.90%) |Others=0.09 (4.09%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2416|ppo_ep: 1|act_loss: -0.0082855224609375|cri_loss: -0.002712249755859375|unsuper_loss: 0.0
-average reward score: 4.8359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.90%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2417|ppo_ep: 1|act_loss: -0.03045654296875|cri_loss: -0.0147552490234375|unsuper_loss: 0.0
-average reward score: 5.015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.34%) |Training time=0.48s (22.12%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2418|ppo_ep: 1|act_loss: -0.05352783203125|cri_loss: -0.0257720947265625|unsuper_loss: 0.0
-average reward score: 5.91796875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.07%) |Training time=0.46s (21.34%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44
-[2023-04-14 10:16:48,627] [INFO] [logging.py:96:log_dist] [Rank 0] step=2420, skipped=28, lr=[7.890253434007843e-06, 7.890253434007843e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:16:48,646] [INFO] [timer.py:199:stop] epoch=0/micro_step=2420/global_step=2420, RunningAvgSamplesPerSec=106.727923793645, CurrSamplesPerSec=107.89796226803395, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:16:48,739] [INFO] [logging.py:96:log_dist] [Rank 0] step=2420, skipped=36, lr=[4.09415319258652e-06, 4.09415319258652e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2419|ppo_ep: 1|act_loss: -0.021881103515625|cri_loss: -0.0089111328125|unsuper_loss: 0.0
-average reward score: 5.58203125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.98%) |Training time=0.46s (21.45%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2420|ppo_ep: 1|act_loss: -0.023956298828125|cri_loss: -0.0104522705078125|unsuper_loss: 0.0
-average reward score: 5.02734375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.59%) |Training time=0.47s (21.82%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2421|ppo_ep: 1|act_loss: -0.0184326171875|cri_loss: -0.00875091552734375|unsuper_loss: 0.0
-average reward score: 4.83203125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.17%) |Training time=0.45s (21.23%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2422|ppo_ep: 1|act_loss: 0.0226287841796875|cri_loss: 0.0121002197265625|unsuper_loss: 0.0
-average reward score: 5.0
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.92%) |Training time=0.46s (21.49%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2423|ppo_ep: 1|act_loss: 0.03912353515625|cri_loss: 0.0204925537109375|unsuper_loss: 0.0
-average reward score: 4.7265625
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.76%) |Training time=0.46s (19.95%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.88 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2424|ppo_ep: 1|act_loss: -0.020233154296875|cri_loss: -0.009307861328125|unsuper_loss: 0.0
-average reward score: 5.34375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.88%) |Training time=0.46s (21.53%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2425|ppo_ep: 1|act_loss: -0.003818511962890625|cri_loss: -0.0005397796630859375|unsuper_loss: 0.0
-average reward score: 5.3671875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.96%) |Training time=0.46s (21.45%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2426|ppo_ep: 1|act_loss: 0.0031452178955078125|cri_loss: 0.00262451171875|unsuper_loss: 0.0
-average reward score: 5.27734375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.92%) |Training time=0.46s (21.49%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2427|ppo_ep: 1|act_loss: 0.01540374755859375|cri_loss: 0.0081024169921875|unsuper_loss: 0.0
-average reward score: 5.15234375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.18%) |Training time=0.46s (21.24%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2428|ppo_ep: 1|act_loss: -0.0140380859375|cri_loss: -0.00598907470703125|unsuper_loss: 0.0
-average reward score: 5.26953125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.22%) |Training time=0.45s (21.20%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44
-[2023-04-14 10:17:10,235] [INFO] [logging.py:96:log_dist] [Rank 0] step=2430, skipped=28, lr=[7.875884771147738e-06, 7.875884771147738e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:17:10,700] [INFO] [timer.py:199:stop] epoch=0/micro_step=2430/global_step=2430, RunningAvgSamplesPerSec=106.66533428350265, CurrSamplesPerSec=42.60237837151541, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:17:10,793] [INFO] [logging.py:96:log_dist] [Rank 0] step=2430, skipped=36, lr=[4.0867271136716215e-06, 4.0867271136716215e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2429|ppo_ep: 1|act_loss: 0.012298583984375|cri_loss: 0.00710296630859375|unsuper_loss: 0.0
-average reward score: 5.05859375
--------------------------------------------------------------------------------------
-|E2E latency=2.60s |Gather latency=0.00s (0.00%) |Generate time=1.59s (61.03%) |Training time=0.91s (35.20%) |Others=0.10 (3.77%)|CurSamplesPerSec=12.32 |AvgSamplesPerSec=14.44
-[2023-04-14 10:17:12,818] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 2430|ppo_ep: 1|act_loss: 0.060546875|cri_loss: 0.031036376953125|unsuper_loss: 0.0
-average reward score: 4.734375
--------------------------------------------------------------------------------------
-|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.78%) |Training time=0.44s (20.58%) |Others=0.10 (4.64%)|CurSamplesPerSec=15.11 |AvgSamplesPerSec=14.44
-[2023-04-14 10:17:14,938] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 2431|ppo_ep: 1|act_loss: 0.031097412109375|cri_loss: 0.0160064697265625|unsuper_loss: 0.0
-average reward score: 5.7265625
--------------------------------------------------------------------------------------
-|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.71%) |Training time=0.42s (19.65%) |Others=0.10 (4.64%)|CurSamplesPerSec=15.09 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2432|ppo_ep: 1|act_loss: 0.023956298828125|cri_loss: 0.01248931884765625|unsuper_loss: 0.0
-average reward score: 5.125
--------------------------------------------------------------------------------------
-|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.06%) |Training time=0.50s (22.35%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.36 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2433|ppo_ep: 1|act_loss: -0.008575439453125|cri_loss: -0.0037174224853515625|unsuper_loss: 0.0
-average reward score: 5.54296875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.32%) |Training time=0.45s (21.08%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2434|ppo_ep: 1|act_loss: -0.03350830078125|cri_loss: -0.01513671875|unsuper_loss: 0.0
-average reward score: 5.0859375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.00%) |Training time=0.46s (21.43%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2435|ppo_ep: 1|act_loss: -0.048828125|cri_loss: -0.0222625732421875|unsuper_loss: 0.0
-average reward score: 4.953125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.05%) |Training time=0.46s (21.37%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2436|ppo_ep: 1|act_loss: -0.03192138671875|cri_loss: -0.01523590087890625|unsuper_loss: 0.0
-average reward score: 5.0859375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.00%) |Training time=0.46s (21.43%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2437|ppo_ep: 1|act_loss: -0.03497314453125|cri_loss: -0.0169219970703125|unsuper_loss: 0.0
-average reward score: 5.203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.65%) |Training time=0.47s (21.81%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2438|ppo_ep: 1|act_loss: -0.00943756103515625|cri_loss: -0.004093170166015625|unsuper_loss: 0.0
-average reward score: 4.97265625
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.73%) |Training time=0.47s (20.02%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.78 |AvgSamplesPerSec=14.44
-[2023-04-14 10:17:32,354] [INFO] [logging.py:96:log_dist] [Rank 0] step=2440, skipped=30, lr=[7.86435727115419e-06, 7.86435727115419e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:17:32,372] [INFO] [timer.py:199:stop] epoch=0/micro_step=2440/global_step=2440, RunningAvgSamplesPerSec=106.67192282118397, CurrSamplesPerSec=106.79475676768126, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:17:32,464] [INFO] [logging.py:96:log_dist] [Rank 0] step=2440, skipped=36, lr=[4.079277515633127e-06, 4.079277515633127e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2439|ppo_ep: 1|act_loss: -0.02801513671875|cri_loss: -0.01326751708984375|unsuper_loss: 0.0
-average reward score: 4.84375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.92%) |Training time=0.46s (21.53%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2440|ppo_ep: 1|act_loss: 0.0024929046630859375|cri_loss: 0.002971649169921875|unsuper_loss: 0.0
-average reward score: 5.52734375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.90%) |Training time=0.46s (21.54%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2441|ppo_ep: 1|act_loss: 0.0130157470703125|cri_loss: 0.00727081298828125|unsuper_loss: 0.0
-average reward score: 5.3046875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.94%) |Training time=0.46s (21.48%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2442|ppo_ep: 1|act_loss: -0.056610107421875|cri_loss: -0.0220947265625|unsuper_loss: 0.0
-average reward score: 5.25
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.65%) |Training time=0.47s (21.79%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2443|ppo_ep: 1|act_loss: 0.002925872802734375|cri_loss: 0.0020599365234375|unsuper_loss: 0.0
-average reward score: 5.28515625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.50%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2444|ppo_ep: 1|act_loss: -0.0277099609375|cri_loss: -0.0124053955078125|unsuper_loss: 0.0
-average reward score: 5.3828125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.63%) |Training time=0.47s (21.80%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2445|ppo_ep: 1|act_loss: -0.01244354248046875|cri_loss: -0.00579071044921875|unsuper_loss: 0.0
-average reward score: 5.28125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.83%) |Training time=0.46s (21.58%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2446|ppo_ep: 1|act_loss: 0.003345489501953125|cri_loss: 0.0025730133056640625|unsuper_loss: 0.0
-average reward score: 5.1796875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.85%) |Training time=0.46s (21.43%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2447|ppo_ep: 1|act_loss: 0.0196533203125|cri_loss: 0.010162353515625|unsuper_loss: 0.0
-average reward score: 5.50390625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.90%) |Training time=0.49s (22.59%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2448|ppo_ep: 1|act_loss: 0.00513458251953125|cri_loss: 0.003154754638671875|unsuper_loss: 0.0
-average reward score: 5.7578125
--------------------------------------------------------------------------------------
-|E2E latency=3.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (50.62%) |Training time=0.49s (15.71%) |Others=1.06 (33.67%)|CurSamplesPerSec=10.16 |AvgSamplesPerSec=14.44
-[2023-04-14 10:17:54,826] [INFO] [logging.py:96:log_dist] [Rank 0] step=2450, skipped=30, lr=[7.849907363439066e-06, 7.849907363439066e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:17:54,844] [INFO] [timer.py:199:stop] epoch=0/micro_step=2450/global_step=2450, RunningAvgSamplesPerSec=106.67218273543259, CurrSamplesPerSec=143.05433748050848, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:17:54,937] [INFO] [logging.py:96:log_dist] [Rank 0] step=2450, skipped=36, lr=[4.071804508892053e-06, 4.071804508892053e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2449|ppo_ep: 1|act_loss: 0.0030059814453125|cri_loss: 0.0019683837890625|unsuper_loss: 0.0
-average reward score: 5.13671875
--------------------------------------------------------------------------------------
-|E2E latency=2.09s |Gather latency=0.00s (0.00%) |Generate time=1.61s (76.84%) |Training time=0.39s (18.46%) |Others=0.10 (4.70%)|CurSamplesPerSec=15.28 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2450|ppo_ep: 1|act_loss: 0.0238037109375|cri_loss: 0.0124053955078125|unsuper_loss: 0.0
-average reward score: 5.75
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.47%) |Training time=0.48s (22.01%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2451|ppo_ep: 1|act_loss: 0.023345947265625|cri_loss: 0.01213836669921875|unsuper_loss: 0.0
-average reward score: 4.91015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.95%) |Training time=0.47s (21.52%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2452|ppo_ep: 1|act_loss: -0.028411865234375|cri_loss: -0.0133209228515625|unsuper_loss: 0.0
-average reward score: 5.86328125
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.61s (69.67%) |Training time=0.60s (26.07%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.87 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2453|ppo_ep: 1|act_loss: -0.03826904296875|cri_loss: -0.01776123046875|unsuper_loss: 0.0
-average reward score: 5.1484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.20%) |Training time=0.46s (21.27%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2454|ppo_ep: 1|act_loss: -0.0279541015625|cri_loss: -0.01248931884765625|unsuper_loss: 0.0
-average reward score: 5.98046875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.85%) |Training time=0.47s (21.62%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2455|ppo_ep: 1|act_loss: -0.070068359375|cri_loss: -0.033843994140625|unsuper_loss: 0.0
-average reward score: 5.5
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.92%) |Training time=0.47s (21.53%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2456|ppo_ep: 1|act_loss: -0.0152740478515625|cri_loss: -0.00713348388671875|unsuper_loss: 0.0
-average reward score: 5.61328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.47s (21.47%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2457|ppo_ep: 1|act_loss: 0.03961181640625|cri_loss: 0.0210113525390625|unsuper_loss: 0.0
-average reward score: 5.94140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.77%) |Training time=0.47s (21.69%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2458|ppo_ep: 1|act_loss: 0.0264739990234375|cri_loss: 0.013702392578125|unsuper_loss: 0.0
-average reward score: 5.20703125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.15%) |Training time=0.46s (21.30%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.44
-[2023-04-14 10:18:16,652] [INFO] [logging.py:96:log_dist] [Rank 0] step=2460, skipped=30, lr=[7.83541261929962e-06, 7.83541261929962e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:18:16,671] [INFO] [timer.py:199:stop] epoch=0/micro_step=2460/global_step=2460, RunningAvgSamplesPerSec=106.65022498334142, CurrSamplesPerSec=106.88601260323848, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:18:16,763] [INFO] [logging.py:96:log_dist] [Rank 0] step=2460, skipped=36, lr=[4.064308204216384e-06, 4.064308204216384e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2459|ppo_ep: 1|act_loss: 0.04412841796875|cri_loss: 0.0247955322265625|unsuper_loss: 0.0
-average reward score: 4.89453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.19%) |Training time=0.46s (21.30%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2460|ppo_ep: 1|act_loss: 0.00756072998046875|cri_loss: 0.004199981689453125|unsuper_loss: 0.0
-average reward score: 5.3046875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.90%) |Training time=0.47s (21.58%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2461|ppo_ep: 1|act_loss: -0.016754150390625|cri_loss: -0.00782012939453125|unsuper_loss: 0.0
-average reward score: 5.046875
--------------------------------------------------------------------------------------
-|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.64s (73.36%) |Training time=0.50s (22.24%) |Others=0.10 (4.40%)|CurSamplesPerSec=14.30 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2462|ppo_ep: 1|act_loss: -0.0281982421875|cri_loss: -0.01314544677734375|unsuper_loss: 0.0
-average reward score: 5.26953125
--------------------------------------------------------------------------------------
-|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.72s (73.11%) |Training time=0.53s (22.69%) |Others=0.10 (4.20%)|CurSamplesPerSec=13.64 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2463|ppo_ep: 1|act_loss: 0.015106201171875|cri_loss: 0.00778961181640625|unsuper_loss: 0.0
-average reward score: 6.09765625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.79%) |Training time=0.45s (20.65%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2464|ppo_ep: 1|act_loss: 0.005290985107421875|cri_loss: 0.003032684326171875|unsuper_loss: 0.0
-average reward score: 5.55859375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.44%) |Training time=0.45s (21.01%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2465|ppo_ep: 1|act_loss: 0.03558349609375|cri_loss: 0.018585205078125|unsuper_loss: 0.0
-average reward score: 5.5625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.61%) |Training time=0.45s (20.82%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2466|ppo_ep: 1|act_loss: 0.05950927734375|cri_loss: 0.03204345703125|unsuper_loss: 0.0
-average reward score: 5.8828125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.17%) |Training time=0.47s (21.34%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2467|ppo_ep: 1|act_loss: -0.024322509765625|cri_loss: -0.011688232421875|unsuper_loss: 0.0
-average reward score: 5.859375
--------------------------------------------------------------------------------------
-|E2E latency=2.83s |Gather latency=0.00s (0.00%) |Generate time=1.77s (62.55%) |Training time=0.47s (16.55%) |Others=0.59 (20.91%)|CurSamplesPerSec=11.32 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2468|ppo_ep: 1|act_loss: -0.0190582275390625|cri_loss: -0.0084686279296875|unsuper_loss: 0.0
-average reward score: 5.515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.32%) |Training time=0.46s (21.10%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.44
-[2023-04-14 10:18:39,237] [INFO] [logging.py:96:log_dist] [Rank 0] step=2470, skipped=30, lr=[7.820873253582933e-06, 7.820873253582933e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:18:39,255] [INFO] [timer.py:199:stop] epoch=0/micro_step=2470/global_step=2470, RunningAvgSamplesPerSec=106.64010750425537, CurrSamplesPerSec=108.77555969060602, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:18:39,348] [INFO] [logging.py:96:log_dist] [Rank 0] step=2470, skipped=36, lr=[4.0567887127194405e-06, 4.0567887127194405e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2469|ppo_ep: 1|act_loss: -0.0386962890625|cri_loss: -0.01812744140625|unsuper_loss: 0.0
-average reward score: 5.453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.33%) |Training time=0.46s (21.14%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2470|ppo_ep: 1|act_loss: -0.01959228515625|cri_loss: -0.00817108154296875|unsuper_loss: 0.0
-average reward score: 5.7890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.08%) |Training time=0.46s (21.37%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2471|ppo_ep: 1|act_loss: -0.060882568359375|cri_loss: -0.029144287109375|unsuper_loss: 0.0
-average reward score: 5.24609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.31%) |Training time=0.46s (21.13%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2472|ppo_ep: 1|act_loss: -0.0433349609375|cri_loss: -0.02105712890625|unsuper_loss: 0.0
-average reward score: 5.73046875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.46s (21.31%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2473|ppo_ep: 1|act_loss: 0.016021728515625|cri_loss: 0.00916290283203125|unsuper_loss: 0.0
-average reward score: 5.015625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.57%) |Training time=0.45s (20.86%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2474|ppo_ep: 1|act_loss: 0.01537322998046875|cri_loss: 0.008544921875|unsuper_loss: 0.0
-average reward score: 5.8203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.18%) |Training time=0.46s (21.28%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2475|ppo_ep: 1|act_loss: 0.0110931396484375|cri_loss: 0.00629425048828125|unsuper_loss: 0.0
-average reward score: 5.01171875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.29%) |Training time=0.46s (21.16%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2476|ppo_ep: 1|act_loss: 0.033050537109375|cri_loss: 0.01837158203125|unsuper_loss: 0.0
-average reward score: 5.48046875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.10%) |Training time=0.46s (21.35%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2477|ppo_ep: 1|act_loss: 0.0188446044921875|cri_loss: 0.00989532470703125|unsuper_loss: 0.0
-average reward score: 5.63671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.35%) |Training time=0.46s (21.12%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2478|ppo_ep: 1|act_loss: 0.025146484375|cri_loss: 0.0129547119140625|unsuper_loss: 0.0
-average reward score: 5.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.99%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44
-[2023-04-14 10:19:00,844] [INFO] [logging.py:96:log_dist] [Rank 0] step=2480, skipped=30, lr=[7.806289481797477e-06, 7.806289481797477e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:19:00,863] [INFO] [timer.py:199:stop] epoch=0/micro_step=2480/global_step=2480, RunningAvgSamplesPerSec=106.6468968374762, CurrSamplesPerSec=107.46410836588056, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:19:00,955] [INFO] [logging.py:96:log_dist] [Rank 0] step=2480, skipped=36, lr=[4.049246145858227e-06, 4.049246145858227e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2479|ppo_ep: 1|act_loss: 0.01556396484375|cri_loss: 0.008331298828125|unsuper_loss: 0.0
-average reward score: 5.2734375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.16%) |Training time=0.46s (21.31%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2480|ppo_ep: 1|act_loss: -0.0232086181640625|cri_loss: -0.01064300537109375|unsuper_loss: 0.0
-average reward score: 5.2734375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.23%) |Training time=0.46s (21.21%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2481|ppo_ep: 1|act_loss: -0.02044677734375|cri_loss: -0.01001739501953125|unsuper_loss: 0.0
-average reward score: 5.2578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.16%) |Training time=0.46s (21.29%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2482|ppo_ep: 1|act_loss: -0.02606201171875|cri_loss: -0.01244354248046875|unsuper_loss: 0.0
-average reward score: 5.43359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.26%) |Training time=0.46s (21.18%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2483|ppo_ep: 1|act_loss: -0.022430419921875|cri_loss: -0.01071929931640625|unsuper_loss: 0.0
-average reward score: 5.38671875
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.22%) |Training time=0.45s (19.56%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2484|ppo_ep: 1|act_loss: -0.041107177734375|cri_loss: -0.0194549560546875|unsuper_loss: 0.0
-average reward score: 6.00390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.46s (21.17%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2485|ppo_ep: 1|act_loss: -0.0049285888671875|cri_loss: -0.0019235610961914062|unsuper_loss: 0.0
-average reward score: 5.7421875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.98%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2486|ppo_ep: 1|act_loss: 0.01280975341796875|cri_loss: 0.006969451904296875|unsuper_loss: 0.0
-average reward score: 5.6484375
--------------------------------------------------------------------------------------
-|E2E latency=3.28s |Gather latency=0.00s (0.00%) |Generate time=1.61s (49.07%) |Training time=0.45s (13.68%) |Others=1.22 (37.25%)|CurSamplesPerSec=9.76 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2487|ppo_ep: 1|act_loss: -0.00832366943359375|cri_loss: -0.0038585662841796875|unsuper_loss: 0.0
-average reward score: 5.8203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.24%) |Training time=0.46s (21.21%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2488|ppo_ep: 1|act_loss: 0.04339599609375|cri_loss: 0.0228118896484375|unsuper_loss: 0.0
-average reward score: 5.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.40%) |Training time=0.45s (21.04%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.44
-[2023-04-14 10:19:23,722] [INFO] [logging.py:96:log_dist] [Rank 0] step=2490, skipped=30, lr=[7.791661520109931e-06, 7.791661520109931e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:19:23,741] [INFO] [timer.py:199:stop] epoch=0/micro_step=2490/global_step=2490, RunningAvgSamplesPerSec=106.6586371426018, CurrSamplesPerSec=114.51340619587567, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:19:23,833] [INFO] [logging.py:96:log_dist] [Rank 0] step=2490, skipped=36, lr=[4.041680615431779e-06, 4.041680615431779e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2489|ppo_ep: 1|act_loss: -0.00444793701171875|cri_loss: -0.0018949508666992188|unsuper_loss: 0.0
-average reward score: 5.421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.01%) |Training time=0.44s (20.45%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2490|ppo_ep: 1|act_loss: -0.036956787109375|cri_loss: -0.0178070068359375|unsuper_loss: 0.0
-average reward score: 5.8828125
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.48%) |Training time=0.46s (20.95%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2491|ppo_ep: 1|act_loss: -0.022216796875|cri_loss: -0.0098419189453125|unsuper_loss: 0.0
-average reward score: 5.5
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.59%) |Training time=0.45s (20.84%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2492|ppo_ep: 1|act_loss: -0.011993408203125|cri_loss: -0.00548553466796875|unsuper_loss: 0.0
-average reward score: 5.8125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.99%) |Training time=0.44s (20.43%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2493|ppo_ep: 1|act_loss: 0.0078582763671875|cri_loss: 0.0042724609375|unsuper_loss: 0.0
-average reward score: 5.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.56%) |Training time=0.46s (20.92%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2494|ppo_ep: 1|act_loss: 0.00855255126953125|cri_loss: 0.00464630126953125|unsuper_loss: 0.0
-average reward score: 5.828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.93%) |Training time=0.47s (21.55%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2495|ppo_ep: 1|act_loss: -0.022430419921875|cri_loss: -0.0108795166015625|unsuper_loss: 0.0
-average reward score: 5.0703125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.52%) |Training time=0.45s (20.92%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2496|ppo_ep: 1|act_loss: 0.018402099609375|cri_loss: 0.0096435546875|unsuper_loss: 0.0
-average reward score: 5.55859375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.56%) |Training time=0.45s (20.88%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2497|ppo_ep: 1|act_loss: -0.00981903076171875|cri_loss: -0.004726409912109375|unsuper_loss: 0.0
-average reward score: 5.44921875
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.66s (73.36%) |Training time=0.50s (22.27%) |Others=0.10 (4.37%)|CurSamplesPerSec=14.16 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2498|ppo_ep: 1|act_loss: 0.0111083984375|cri_loss: 0.00756072998046875|unsuper_loss: 0.0
-average reward score: 5.3984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.53%) |Training time=0.45s (20.90%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
-[2023-04-14 10:19:45,473] [INFO] [logging.py:96:log_dist] [Rank 0] step=2500, skipped=30, lr=[7.776989585341974e-06, 7.776989585341974e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:19:45,491] [INFO] [timer.py:199:stop] epoch=0/micro_step=2500/global_step=2500, RunningAvgSamplesPerSec=106.674029550773, CurrSamplesPerSec=108.61095709058188, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:19:45,584] [INFO] [logging.py:96:log_dist] [Rank 0] step=2500, skipped=36, lr=[4.034092233579507e-06, 4.034092233579507e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2499|ppo_ep: 1|act_loss: 0.00739288330078125|cri_loss: 0.003993988037109375|unsuper_loss: 0.0
-average reward score: 5.921875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.28%) |Training time=0.46s (21.19%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2500|ppo_ep: 1|act_loss: 0.02899169921875|cri_loss: 0.015228271484375|unsuper_loss: 0.0
-average reward score: 5.62109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.29%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2501|ppo_ep: 1|act_loss: 0.0305938720703125|cri_loss: 0.0160675048828125|unsuper_loss: 0.0
-average reward score: 5.5625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.08%) |Training time=0.46s (21.37%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2502|ppo_ep: 1|act_loss: -0.043670654296875|cri_loss: -0.02056884765625|unsuper_loss: 0.0
-average reward score: 5.40234375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.29%) |Training time=0.46s (21.15%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2503|ppo_ep: 1|act_loss: -0.059326171875|cri_loss: -0.02783203125|unsuper_loss: 0.0
-average reward score: 6.2578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.34%) |Training time=0.45s (21.09%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2504|ppo_ep: 1|act_loss: -0.0218048095703125|cri_loss: -0.01064300537109375|unsuper_loss: 0.0
-average reward score: 5.8125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.46s (21.44%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2505|ppo_ep: 1|act_loss: 0.0115966796875|cri_loss: 0.006610870361328125|unsuper_loss: 0.0
-average reward score: 5.53515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.34%) |Training time=0.46s (21.11%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2506|ppo_ep: 1|act_loss: 0.0372314453125|cri_loss: 0.019439697265625|unsuper_loss: 0.0
-average reward score: 4.828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.94%) |Training time=0.47s (21.52%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2507|ppo_ep: 1|act_loss: 0.019927978515625|cri_loss: 0.01120758056640625|unsuper_loss: 0.0
-average reward score: 5.12109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.87%) |Training time=0.44s (20.55%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2508|ppo_ep: 1|act_loss: -0.009979248046875|cri_loss: -0.00489044189453125|unsuper_loss: 0.0
-average reward score: 5.421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (22.14%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45
-[2023-04-14 10:20:07,063] [INFO] [logging.py:96:log_dist] [Rank 0] step=2510, skipped=30, lr=[7.762273894967078e-06, 7.762273894967078e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:20:07,081] [INFO] [timer.py:199:stop] epoch=0/micro_step=2510/global_step=2510, RunningAvgSamplesPerSec=106.67546349044484, CurrSamplesPerSec=101.66207505639152, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:20:07,174] [INFO] [logging.py:96:log_dist] [Rank 0] step=2510, skipped=36, lr=[4.0264811127795375e-06, 4.0264811127795375e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2509|ppo_ep: 1|act_loss: 0.059600830078125|cri_loss: 0.034027099609375|unsuper_loss: 0.0
-average reward score: 4.93359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.34%) |Training time=0.48s (22.12%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2510|ppo_ep: 1|act_loss: -0.0127716064453125|cri_loss: -0.00566864013671875|unsuper_loss: 0.0
-average reward score: 5.8125
--------------------------------------------------------------------------------------
-|E2E latency=2.90s |Gather latency=0.00s (0.00%) |Generate time=1.58s (54.53%) |Training time=0.47s (16.30%) |Others=0.85 (29.17%)|CurSamplesPerSec=11.04 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2511|ppo_ep: 1|act_loss: 0.017974853515625|cri_loss: 0.0095367431640625|unsuper_loss: 0.0
-average reward score: 4.94140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.95%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2512|ppo_ep: 1|act_loss: -0.02691650390625|cri_loss: -0.01318359375|unsuper_loss: 0.0
-average reward score: 5.171875
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.82%) |Training time=0.48s (21.59%) |Others=0.15 (6.59%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2513|ppo_ep: 1|act_loss: 0.04351806640625|cri_loss: 0.025054931640625|unsuper_loss: 0.0
-average reward score: 5.44921875
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.66%) |Training time=0.47s (20.99%) |Others=0.10 (4.36%)|CurSamplesPerSec=14.17 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2514|ppo_ep: 1|act_loss: 0.0301361083984375|cri_loss: 0.0162506103515625|unsuper_loss: 0.0
-average reward score: 5.3984375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.81%) |Training time=0.49s (22.68%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2515|ppo_ep: 1|act_loss: 0.0187225341796875|cri_loss: 0.0095977783203125|unsuper_loss: 0.0
-average reward score: 5.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.79%) |Training time=0.47s (21.65%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.44
-[2023-04-14 10:20:23,173] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 2516|ppo_ep: 1|act_loss: -0.0048065185546875|cri_loss: -0.0020961761474609375|unsuper_loss: 0.0
-average reward score: 5.5625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.86%) |Training time=0.47s (22.07%) |Others=0.09 (4.08%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44
-[2023-04-14 10:20:25,315] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 2517|ppo_ep: 1|act_loss: -0.00971221923828125|cri_loss: -0.0034580230712890625|unsuper_loss: 0.0
-average reward score: 5.328125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.93%) |Training time=0.47s (21.99%) |Others=0.09 (4.08%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2518|ppo_ep: 1|act_loss: 0.0168914794921875|cri_loss: 0.01013946533203125|unsuper_loss: 0.0
-average reward score: 5.2734375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.92%) |Training time=0.47s (21.53%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.44
-[2023-04-14 10:20:29,556] [INFO] [logging.py:96:log_dist] [Rank 0] step=2520, skipped=30, lr=[7.747514667107269e-06, 7.747514667107269e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:20:29,574] [INFO] [timer.py:199:stop] epoch=0/micro_step=2520/global_step=2520, RunningAvgSamplesPerSec=106.65882156515856, CurrSamplesPerSec=102.7891293792571, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:20:29,667] [INFO] [logging.py:96:log_dist] [Rank 0] step=2520, skipped=38, lr=[4.020375919897733e-06, 4.020375919897733e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2519|ppo_ep: 1|act_loss: 0.005268096923828125|cri_loss: 0.003143310546875|unsuper_loss: 0.0
-average reward score: 5.7578125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.89%) |Training time=0.47s (21.65%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2520|ppo_ep: 1|act_loss: 0.034912109375|cri_loss: 0.0180206298828125|unsuper_loss: 0.0
-average reward score: 5.859375
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.09%) |Training time=0.47s (20.58%) |Others=0.10 (4.33%)|CurSamplesPerSec=14.10 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2521|ppo_ep: 1|act_loss: 0.02166748046875|cri_loss: 0.0110626220703125|unsuper_loss: 0.0
-average reward score: 5.53515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.97%) |Training time=0.47s (21.50%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2522|ppo_ep: 1|act_loss: 0.0024871826171875|cri_loss: 0.0013885498046875|unsuper_loss: 0.0
-average reward score: 5.4609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.63%) |Training time=0.47s (21.84%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2523|ppo_ep: 1|act_loss: -0.024139404296875|cri_loss: -0.01153564453125|unsuper_loss: 0.0
-average reward score: 5.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.85%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2524|ppo_ep: 1|act_loss: -0.0002079010009765625|cri_loss: 0.0004534721374511719|unsuper_loss: 0.0
-average reward score: 5.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.47s (21.96%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2525|ppo_ep: 1|act_loss: -0.00968170166015625|cri_loss: -0.0042877197265625|unsuper_loss: 0.0
-average reward score: 5.015625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.48s (22.08%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2526|ppo_ep: 1|act_loss: -0.06610107421875|cri_loss: -0.03070068359375|unsuper_loss: 0.0
-average reward score: 5.44140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.31%) |Training time=0.48s (22.12%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2527|ppo_ep: 1|act_loss: -0.04644775390625|cri_loss: -0.0222320556640625|unsuper_loss: 0.0
-average reward score: 5.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.56%) |Training time=0.47s (21.46%) |Others=0.13 (5.99%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2528|ppo_ep: 1|act_loss: -0.002399444580078125|cri_loss: -0.00034809112548828125|unsuper_loss: 0.0
-average reward score: 5.359375
--------------------------------------------------------------------------------------
-|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.71s (74.85%) |Training time=0.47s (20.52%) |Others=0.11 (4.63%)|CurSamplesPerSec=14.00 |AvgSamplesPerSec=14.45
-[2023-04-14 10:20:51,418] [INFO] [logging.py:96:log_dist] [Rank 0] step=2530, skipped=30, lr=[7.732712120529913e-06, 7.732712120529913e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:20:51,436] [INFO] [timer.py:199:stop] epoch=0/micro_step=2530/global_step=2530, RunningAvgSamplesPerSec=106.64682603841231, CurrSamplesPerSec=102.19486489141136, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:20:51,529] [INFO] [logging.py:96:log_dist] [Rank 0] step=2530, skipped=38, lr=[4.012724153511568e-06, 4.012724153511568e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2529|ppo_ep: 1|act_loss: -0.02001953125|cri_loss: -0.00980377197265625|unsuper_loss: 0.0
-average reward score: 6.09765625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.48s (22.08%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2530|ppo_ep: 1|act_loss: 0.0421142578125|cri_loss: 0.022308349609375|unsuper_loss: 0.0
-average reward score: 5.67578125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.47s (21.66%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2531|ppo_ep: 1|act_loss: 0.024688720703125|cri_loss: 0.0128326416015625|unsuper_loss: 0.0
-average reward score: 5.5390625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.82%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
-[2023-04-14 10:20:57,867] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 2532|ppo_ep: 1|act_loss: 0.07666015625|cri_loss: 0.041778564453125|unsuper_loss: 0.0
-average reward score: 5.01953125
--------------------------------------------------------------------------------------
-|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.67%) |Training time=0.44s (20.73%) |Others=0.10 (4.60%)|CurSamplesPerSec=15.09 |AvgSamplesPerSec=14.45
-[2023-04-14 10:20:59,996] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 2533|ppo_ep: 1|act_loss: 0.046142578125|cri_loss: 0.0245513916015625|unsuper_loss: 0.0
-average reward score: 5.87890625
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.53%) |Training time=0.44s (20.87%) |Others=0.10 (4.60%)|CurSamplesPerSec=15.02 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2534|ppo_ep: 1|act_loss: -7.677078247070312e-05|cri_loss: 0.0001392364501953125|unsuper_loss: 0.0
-average reward score: 5.8984375
--------------------------------------------------------------------------------------
-|E2E latency=2.60s |Gather latency=0.00s (0.00%) |Generate time=1.58s (60.80%) |Training time=0.48s (18.31%) |Others=0.54 (20.90%)|CurSamplesPerSec=12.29 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2535|ppo_ep: 1|act_loss: 0.00254058837890625|cri_loss: 0.0013990402221679688|unsuper_loss: 0.0
-average reward score: 5.76171875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.86%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2536|ppo_ep: 1|act_loss: -0.01519012451171875|cri_loss: -0.00730133056640625|unsuper_loss: 0.0
-average reward score: 5.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.61%) |Training time=0.45s (20.84%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2537|ppo_ep: 1|act_loss: -0.02679443359375|cri_loss: -0.01297760009765625|unsuper_loss: 0.0
-average reward score: 5.33984375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.51%) |Training time=0.45s (20.89%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2538|ppo_ep: 1|act_loss: -0.04571533203125|cri_loss: -0.022308349609375|unsuper_loss: 0.0
-average reward score: 5.78125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.64%) |Training time=0.47s (21.80%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
-[2023-04-14 10:21:13,351] [INFO] [logging.py:96:log_dist] [Rank 0] step=2540, skipped=32, lr=[7.720839041213051e-06, 7.720839041213051e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:21:13,369] [INFO] [timer.py:199:stop] epoch=0/micro_step=2540/global_step=2540, RunningAvgSamplesPerSec=106.64877381183305, CurrSamplesPerSec=102.9472844528714, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:21:13,462] [INFO] [logging.py:96:log_dist] [Rank 0] step=2540, skipped=38, lr=[4.0050499649041985e-06, 4.0050499649041985e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2539|ppo_ep: 1|act_loss: -0.027313232421875|cri_loss: -0.01280975341796875|unsuper_loss: 0.0
-average reward score: 5.37109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.94%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2540|ppo_ep: 1|act_loss: 0.001941680908203125|cri_loss: 0.0014667510986328125|unsuper_loss: 0.0
-average reward score: 5.3671875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.54%) |Training time=0.47s (21.90%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2541|ppo_ep: 1|act_loss: 0.00600433349609375|cri_loss: 0.003421783447265625|unsuper_loss: 0.0
-average reward score: 5.39453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.44%) |Training time=0.47s (22.01%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2542|ppo_ep: 1|act_loss: 0.00748443603515625|cri_loss: 0.004150390625|unsuper_loss: 0.0
-average reward score: 5.57421875
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.58%) |Training time=0.47s (20.19%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.80 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2543|ppo_ep: 1|act_loss: 0.025390625|cri_loss: 0.014404296875|unsuper_loss: 0.0
-average reward score: 5.6015625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.86%) |Training time=0.46s (21.58%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2544|ppo_ep: 1|act_loss: 0.011566162109375|cri_loss: 0.00630950927734375|unsuper_loss: 0.0
-average reward score: 5.546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.73%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2545|ppo_ep: 1|act_loss: 0.0166015625|cri_loss: 0.0087432861328125|unsuper_loss: 0.0
-average reward score: 4.2734375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.92%) |Training time=0.46s (21.50%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2546|ppo_ep: 1|act_loss: 0.0025577545166015625|cri_loss: 0.001453399658203125|unsuper_loss: 0.0
-average reward score: 5.66015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.74%) |Training time=0.47s (21.70%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2547|ppo_ep: 1|act_loss: -0.005771636962890625|cri_loss: -0.002132415771484375|unsuper_loss: 0.0
-average reward score: 5.41015625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.47s (21.67%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2548|ppo_ep: 1|act_loss: -0.0005550384521484375|cri_loss: 0.0006742477416992188|unsuper_loss: 0.0
-average reward score: 5.48046875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.15%) |Training time=0.46s (21.34%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.45
-[2023-04-14 10:21:35,117] [INFO] [logging.py:96:log_dist] [Rank 0] step=2550, skipped=32, lr=[7.705959074285325e-06, 7.705959074285325e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:21:35,135] [INFO] [timer.py:199:stop] epoch=0/micro_step=2550/global_step=2550, RunningAvgSamplesPerSec=106.64088588508935, CurrSamplesPerSec=103.17353387250142, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:21:35,228] [INFO] [logging.py:96:log_dist] [Rank 0] step=2550, skipped=38, lr=[3.997353467825614e-06, 3.997353467825614e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2549|ppo_ep: 1|act_loss: -0.033782958984375|cri_loss: -0.016387939453125|unsuper_loss: 0.0
-average reward score: 5.765625
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.02%) |Training time=0.47s (21.52%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2550|ppo_ep: 1|act_loss: -0.016998291015625|cri_loss: -0.0077667236328125|unsuper_loss: 0.0
-average reward score: 5.6484375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.46s (21.61%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2551|ppo_ep: 1|act_loss: -0.00411224365234375|cri_loss: -0.0017375946044921875|unsuper_loss: 0.0
-average reward score: 5.66015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.13%) |Training time=0.46s (21.29%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2552|ppo_ep: 1|act_loss: 0.027069091796875|cri_loss: 0.01416778564453125|unsuper_loss: 0.0
-average reward score: 5.40234375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.59%) |Training time=0.45s (20.85%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2553|ppo_ep: 1|act_loss: 0.00783538818359375|cri_loss: 0.004398345947265625|unsuper_loss: 0.0
-average reward score: 5.63671875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.28%) |Training time=0.46s (21.15%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2554|ppo_ep: 1|act_loss: -0.0142974853515625|cri_loss: -0.00685882568359375|unsuper_loss: 0.0
-average reward score: 5.29296875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.08%) |Training time=0.46s (21.34%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2555|ppo_ep: 1|act_loss: -0.004123687744140625|cri_loss: -0.001667022705078125|unsuper_loss: 0.0
-average reward score: 5.5625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.69%) |Training time=0.45s (20.73%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2556|ppo_ep: 1|act_loss: -0.0052032470703125|cri_loss: -0.0022716522216796875|unsuper_loss: 0.0
-average reward score: 5.78125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.17%) |Training time=0.46s (21.26%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2557|ppo_ep: 1|act_loss: -0.01218414306640625|cri_loss: -0.0057220458984375|unsuper_loss: 0.0
-average reward score: 5.421875
--------------------------------------------------------------------------------------
-|E2E latency=2.84s |Gather latency=0.00s (0.00%) |Generate time=1.77s (62.32%) |Training time=0.45s (16.02%) |Others=0.61 (21.65%)|CurSamplesPerSec=11.27 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2558|ppo_ep: 1|act_loss: 0.0281524658203125|cri_loss: 0.0150299072265625|unsuper_loss: 0.0
-average reward score: 5.12890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.94%) |Training time=0.47s (21.51%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.45
-[2023-04-14 10:21:57,356] [INFO] [logging.py:96:log_dist] [Rank 0] step=2560, skipped=32, lr=[7.691036404594183e-06, 7.691036404594183e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:21:57,374] [INFO] [timer.py:199:stop] epoch=0/micro_step=2560/global_step=2560, RunningAvgSamplesPerSec=106.64851899366893, CurrSamplesPerSec=108.4322206360786, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:21:57,467] [INFO] [logging.py:96:log_dist] [Rank 0] step=2560, skipped=38, lr=[3.989634776356468e-06, 3.989634776356468e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2559|ppo_ep: 1|act_loss: -0.00821685791015625|cri_loss: -0.0036067962646484375|unsuper_loss: 0.0
-average reward score: 4.890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.26%) |Training time=0.46s (21.21%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2560|ppo_ep: 1|act_loss: -0.01593017578125|cri_loss: -0.0075836181640625|unsuper_loss: 0.0
-average reward score: 5.421875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.24%) |Training time=0.46s (21.20%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2561|ppo_ep: 1|act_loss: 0.012786865234375|cri_loss: 0.006656646728515625|unsuper_loss: 0.0
-average reward score: 5.96875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.46s (21.29%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2562|ppo_ep: 1|act_loss: 0.0024127960205078125|cri_loss: 0.0014629364013671875|unsuper_loss: 0.0
-average reward score: 5.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.30%) |Training time=0.45s (20.98%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2563|ppo_ep: 1|act_loss: -0.028656005859375|cri_loss: -0.0138397216796875|unsuper_loss: 0.0
-average reward score: 4.98046875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.35%) |Training time=0.45s (21.08%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2564|ppo_ep: 1|act_loss: 0.01380157470703125|cri_loss: 0.007137298583984375|unsuper_loss: 0.0
-average reward score: 5.71875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.32%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2565|ppo_ep: 1|act_loss: 0.05413818359375|cri_loss: 0.0276336669921875|unsuper_loss: 0.0
-average reward score: 5.00390625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.49%) |Training time=0.45s (20.94%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2566|ppo_ep: 1|act_loss: 0.01453399658203125|cri_loss: 0.009857177734375|unsuper_loss: 0.0
-average reward score: 6.71875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.51%) |Training time=0.45s (20.92%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2567|ppo_ep: 1|act_loss: -0.01490020751953125|cri_loss: -0.0070953369140625|unsuper_loss: 0.0
-average reward score: 5.78125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.22%) |Training time=0.43s (20.18%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2568|ppo_ep: 1|act_loss: -0.03192138671875|cri_loss: -0.015533447265625|unsuper_loss: 0.0
-average reward score: 5.5
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.41%) |Training time=0.45s (21.03%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
-[2023-04-14 10:22:18,902] [INFO] [logging.py:96:log_dist] [Rank 0] step=2570, skipped=32, lr=[7.676071253329589e-06, 7.676071253329589e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:22:18,921] [INFO] [timer.py:199:stop] epoch=0/micro_step=2570/global_step=2570, RunningAvgSamplesPerSec=106.66218526568217, CurrSamplesPerSec=109.55198828552562, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:22:19,013] [INFO] [logging.py:96:log_dist] [Rank 0] step=2570, skipped=38, lr=[3.981894004906388e-06, 3.981894004906388e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2569|ppo_ep: 1|act_loss: -0.0310821533203125|cri_loss: -0.0146331787109375|unsuper_loss: 0.0
-average reward score: 5.37890625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.33%) |Training time=0.45s (21.12%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2570|ppo_ep: 1|act_loss: 0.0008916854858398438|cri_loss: 0.0008840560913085938|unsuper_loss: 0.0
-average reward score: 5.7109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.32%) |Training time=0.45s (21.10%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2571|ppo_ep: 1|act_loss: 0.0136871337890625|cri_loss: 0.007045745849609375|unsuper_loss: 0.0
-average reward score: 5.78515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.47%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2572|ppo_ep: 1|act_loss: 0.0029239654541015625|cri_loss: 0.0018987655639648438|unsuper_loss: 0.0
-average reward score: 5.28125
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.68%) |Training time=0.47s (20.12%) |Others=0.10 (4.20%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2573|ppo_ep: 1|act_loss: 0.03326416015625|cri_loss: 0.0170135498046875|unsuper_loss: 0.0
-average reward score: 5.41796875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.32%) |Training time=0.45s (21.11%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2574|ppo_ep: 1|act_loss: 0.0264892578125|cri_loss: 0.0137939453125|unsuper_loss: 0.0
-average reward score: 5.44921875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.24%) |Training time=0.45s (21.09%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2575|ppo_ep: 1|act_loss: -0.0259246826171875|cri_loss: -0.01274871826171875|unsuper_loss: 0.0
-average reward score: 5.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.65%) |Training time=0.45s (20.83%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2576|ppo_ep: 1|act_loss: -0.0147552490234375|cri_loss: -0.006183624267578125|unsuper_loss: 0.0
-average reward score: 5.8671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.25%) |Training time=0.46s (21.19%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2577|ppo_ep: 1|act_loss: -0.03240966796875|cri_loss: -0.0154571533203125|unsuper_loss: 0.0
-average reward score: 5.23046875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.39%) |Training time=0.46s (21.07%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2578|ppo_ep: 1|act_loss: -0.04736328125|cri_loss: -0.0228118896484375|unsuper_loss: 0.0
-average reward score: 5.10546875
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.38%) |Training time=0.47s (21.17%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.45
-[2023-04-14 10:22:40,839] [INFO] [logging.py:96:log_dist] [Rank 0] step=2580, skipped=32, lr=[7.661063842311183e-06, 7.661063842311183e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:22:40,858] [INFO] [timer.py:199:stop] epoch=0/micro_step=2580/global_step=2580, RunningAvgSamplesPerSec=106.6658442359836, CurrSamplesPerSec=105.24808684721674, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:22:40,951] [INFO] [logging.py:96:log_dist] [Rank 0] step=2580, skipped=38, lr=[3.97413126821228e-06, 3.97413126821228e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2579|ppo_ep: 1|act_loss: -0.0352783203125|cri_loss: -0.017059326171875|unsuper_loss: 0.0
-average reward score: 5.04296875
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.16%) |Training time=0.47s (20.53%) |Others=0.10 (4.32%)|CurSamplesPerSec=14.07 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2580|ppo_ep: 1|act_loss: -0.022186279296875|cri_loss: -0.0107574462890625|unsuper_loss: 0.0
-average reward score: 5.1796875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.46s (21.23%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2581|ppo_ep: 1|act_loss: 0.00836181640625|cri_loss: 0.00673675537109375|unsuper_loss: 0.0
-average reward score: 5.33984375
--------------------------------------------------------------------------------------
-|E2E latency=2.71s |Gather latency=0.00s (0.00%) |Generate time=1.62s (59.69%) |Training time=0.44s (16.41%) |Others=0.65 (23.90%)|CurSamplesPerSec=11.80 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2582|ppo_ep: 1|act_loss: -0.00373077392578125|cri_loss: -0.0012674331665039062|unsuper_loss: 0.0
-average reward score: 5.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.29%) |Training time=0.46s (21.17%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2583|ppo_ep: 1|act_loss: 0.018829345703125|cri_loss: 0.0101776123046875|unsuper_loss: 0.0
-average reward score: 5.19921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.86%) |Training time=0.47s (21.59%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2584|ppo_ep: 1|act_loss: 0.004974365234375|cri_loss: 0.0033092498779296875|unsuper_loss: 0.0
-average reward score: 5.65234375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.31%) |Training time=0.45s (21.10%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2585|ppo_ep: 1|act_loss: -0.0110931396484375|cri_loss: -0.00537109375|unsuper_loss: 0.0
-average reward score: 5.1015625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.49%) |Training time=0.45s (20.93%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2586|ppo_ep: 1|act_loss: -0.0142364501953125|cri_loss: -0.0065460205078125|unsuper_loss: 0.0
-average reward score: 4.75390625
--------------------------------------------------------------------------------------
-|E2E latency=2.89s |Gather latency=0.00s (0.00%) |Generate time=1.76s (60.97%) |Training time=0.48s (16.67%) |Others=0.65 (22.36%)|CurSamplesPerSec=11.08 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2587|ppo_ep: 1|act_loss: 0.0120391845703125|cri_loss: 0.00667572021484375|unsuper_loss: 0.0
-average reward score: 5.50390625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.46s (21.22%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2588|ppo_ep: 1|act_loss: -0.03863525390625|cri_loss: -0.017364501953125|unsuper_loss: 0.0
-average reward score: 5.12109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (21.04%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
-[2023-04-14 10:23:03,693] [INFO] [logging.py:96:log_dist] [Rank 0] step=2590, skipped=32, lr=[7.646014393985005e-06, 7.646014393985005e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:23:03,712] [INFO] [timer.py:199:stop] epoch=0/micro_step=2590/global_step=2590, RunningAvgSamplesPerSec=106.671823033167, CurrSamplesPerSec=107.67645173531136, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:23:03,805] [INFO] [logging.py:96:log_dist] [Rank 0] step=2590, skipped=38, lr=[3.96634668133663e-06, 3.96634668133663e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2589|ppo_ep: 1|act_loss: -0.048431396484375|cri_loss: -0.0231170654296875|unsuper_loss: 0.0
-average reward score: 5.61328125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.13%) |Training time=0.46s (21.32%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2590|ppo_ep: 1|act_loss: -0.047698974609375|cri_loss: -0.02325439453125|unsuper_loss: 0.0
-average reward score: 5.1953125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.32%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2591|ppo_ep: 1|act_loss: -0.00933837890625|cri_loss: -0.00406646728515625|unsuper_loss: 0.0
-average reward score: 5.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.10%) |Training time=0.46s (21.33%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2592|ppo_ep: 1|act_loss: 0.00988006591796875|cri_loss: 0.00601959228515625|unsuper_loss: 0.0
-average reward score: 4.85546875
--------------------------------------------------------------------------------------
-|E2E latency=2.60s |Gather latency=0.00s (0.00%) |Generate time=1.59s (61.40%) |Training time=0.46s (17.59%) |Others=0.55 (21.01%)|CurSamplesPerSec=12.33 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2593|ppo_ep: 1|act_loss: 0.0369873046875|cri_loss: 0.019439697265625|unsuper_loss: 0.0
-average reward score: 5.5703125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.16%) |Training time=0.46s (21.25%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2594|ppo_ep: 1|act_loss: 0.053192138671875|cri_loss: 0.0272216796875|unsuper_loss: 0.0
-average reward score: 5.52734375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.38%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2595|ppo_ep: 1|act_loss: 0.025787353515625|cri_loss: 0.0153961181640625|unsuper_loss: 0.0
-average reward score: 5.41796875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.46s (21.23%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2596|ppo_ep: 1|act_loss: 0.021759033203125|cri_loss: 0.011383056640625|unsuper_loss: 0.0
-average reward score: 5.75
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.86%) |Training time=0.47s (21.63%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2597|ppo_ep: 1|act_loss: 0.0087890625|cri_loss: 0.0050811767578125|unsuper_loss: 0.0
-average reward score: 5.16796875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.04%) |Training time=0.46s (21.41%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2598|ppo_ep: 1|act_loss: 0.021331787109375|cri_loss: 0.01171112060546875|unsuper_loss: 0.0
-average reward score: 5.20703125
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.85%) |Training time=0.45s (20.49%) |Others=0.17 (7.66%)|CurSamplesPerSec=14.42 |AvgSamplesPerSec=14.45
-[2023-04-14 10:23:25,756] [INFO] [logging.py:96:log_dist] [Rank 0] step=2600, skipped=32, lr=[7.630923131420182e-06, 7.630923131420182e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:23:25,775] [INFO] [timer.py:199:stop] epoch=0/micro_step=2600/global_step=2600, RunningAvgSamplesPerSec=106.67467146215988, CurrSamplesPerSec=104.68854435844617, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:23:25,867] [INFO] [logging.py:96:log_dist] [Rank 0] step=2600, skipped=38, lr=[3.958540359665793e-06, 3.958540359665793e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2599|ppo_ep: 1|act_loss: -0.0157318115234375|cri_loss: -0.007659912109375|unsuper_loss: 0.0
-average reward score: 5.703125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.83%) |Training time=0.47s (21.63%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2600|ppo_ep: 1|act_loss: -0.0126190185546875|cri_loss: -0.005321502685546875|unsuper_loss: 0.0
-average reward score: 5.625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.98%) |Training time=0.46s (21.46%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2601|ppo_ep: 1|act_loss: -0.0704345703125|cri_loss: -0.033203125|unsuper_loss: 0.0
-average reward score: 5.10546875
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.28%) |Training time=0.48s (20.52%) |Others=0.10 (4.20%)|CurSamplesPerSec=13.66 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2602|ppo_ep: 1|act_loss: -0.0760498046875|cri_loss: -0.03662109375|unsuper_loss: 0.0
-average reward score: 5.58984375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.17%) |Training time=0.46s (21.32%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2603|ppo_ep: 1|act_loss: 0.004253387451171875|cri_loss: 0.002468109130859375|unsuper_loss: 0.0
-average reward score: 5.8828125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.21%) |Training time=0.46s (21.21%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2604|ppo_ep: 1|act_loss: -0.00458526611328125|cri_loss: -0.002124786376953125|unsuper_loss: 0.0
-average reward score: 5.83203125
--------------------------------------------------------------------------------------
-|E2E latency=2.61s |Gather latency=0.00s (0.00%) |Generate time=1.60s (61.13%) |Training time=0.45s (17.40%) |Others=0.56 (21.47%)|CurSamplesPerSec=12.26 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2605|ppo_ep: 1|act_loss: 0.004978179931640625|cri_loss: 0.0028743743896484375|unsuper_loss: 0.0
-average reward score: 4.859375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.51%) |Training time=0.45s (20.93%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2606|ppo_ep: 1|act_loss: 0.0413818359375|cri_loss: 0.0211334228515625|unsuper_loss: 0.0
-average reward score: 5.69921875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.77%) |Training time=0.47s (21.51%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2607|ppo_ep: 1|act_loss: 0.01214599609375|cri_loss: 0.00688934326171875|unsuper_loss: 0.0
-average reward score: 5.5703125
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.62s (72.09%) |Training time=0.53s (23.53%) |Others=0.10 (4.38%)|CurSamplesPerSec=14.24 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2608|ppo_ep: 1|act_loss: 0.032012939453125|cri_loss: 0.0164794921875|unsuper_loss: 0.0
-average reward score: 5.046875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.49%) |Training time=0.45s (20.94%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
-[2023-04-14 10:23:48,076] [INFO] [logging.py:96:log_dist] [Rank 0] step=2610, skipped=32, lr=[7.615790278305629e-06, 7.615790278305629e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:23:48,095] [INFO] [timer.py:199:stop] epoch=0/micro_step=2610/global_step=2610, RunningAvgSamplesPerSec=106.66852722864914, CurrSamplesPerSec=108.16473682853238, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:23:48,187] [INFO] [logging.py:96:log_dist] [Rank 0] step=2610, skipped=38, lr=[3.95071241890829e-06, 3.95071241890829e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2609|ppo_ep: 1|act_loss: 0.00861358642578125|cri_loss: 0.0045623779296875|unsuper_loss: 0.0
-average reward score: 4.97265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.26%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2610|ppo_ep: 1|act_loss: -0.0095062255859375|cri_loss: -0.00423431396484375|unsuper_loss: 0.0
-average reward score: 5.671875
--------------------------------------------------------------------------------------
-|E2E latency=2.51s |Gather latency=0.00s (0.00%) |Generate time=1.59s (63.11%) |Training time=0.47s (18.51%) |Others=0.46 (18.38%)|CurSamplesPerSec=12.73 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2611|ppo_ep: 1|act_loss: -0.018829345703125|cri_loss: -0.00910186767578125|unsuper_loss: 0.0
-average reward score: 5.53125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.47%) |Training time=0.48s (21.95%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2612|ppo_ep: 1|act_loss: -0.0234527587890625|cri_loss: -0.01035308837890625|unsuper_loss: 0.0
-average reward score: 5.23046875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.12%) |Training time=0.48s (22.33%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2613|ppo_ep: 1|act_loss: 0.005229949951171875|cri_loss: 0.00333404541015625|unsuper_loss: 0.0
-average reward score: 5.24609375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.43%) |Training time=0.47s (21.99%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2614|ppo_ep: 1|act_loss: -0.0267486572265625|cri_loss: -0.01274871826171875|unsuper_loss: 0.0
-average reward score: 5.359375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.52%) |Training time=0.47s (21.91%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2615|ppo_ep: 1|act_loss: 0.0052490234375|cri_loss: 0.003108978271484375|unsuper_loss: 0.0
-average reward score: 5.3671875
--------------------------------------------------------------------------------------
-|E2E latency=2.54s |Gather latency=0.00s (0.00%) |Generate time=1.62s (63.71%) |Training time=0.63s (24.65%) |Others=0.30 (11.64%)|CurSamplesPerSec=12.60 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2616|ppo_ep: 1|act_loss: 0.04046630859375|cri_loss: 0.021759033203125|unsuper_loss: 0.0
-average reward score: 5.09375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.68%) |Training time=0.47s (21.73%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2617|ppo_ep: 1|act_loss: -0.0025787353515625|cri_loss: 0.0013027191162109375|unsuper_loss: 0.0
-average reward score: 5.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.74%) |Training time=0.47s (21.69%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
-[2023-04-14 10:24:08,322] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 2618|ppo_ep: 1|act_loss: 0.017486572265625|cri_loss: 0.00926971435546875|unsuper_loss: 0.0
-average reward score: 5.20703125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.09%) |Training time=0.47s (21.81%) |Others=0.09 (4.10%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.45
-[2023-04-14 10:24:10,360] [INFO] [logging.py:96:log_dist] [Rank 0] step=2620, skipped=32, lr=[7.600616058946736e-06, 7.600616058946736e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:24:10,378] [INFO] [timer.py:199:stop] epoch=0/micro_step=2620/global_step=2620, RunningAvgSamplesPerSec=106.64397854881977, CurrSamplesPerSec=103.47651192983646, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:24:10,462] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-[2023-04-14 10:24:10,463] [INFO] [logging.py:96:log_dist] [Rank 0] step=2620, skipped=40, lr=[3.9444345785206285e-06, 3.9444345785206285e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2619|ppo_ep: 1|act_loss: 0.020050048828125|cri_loss: 0.010467529296875|unsuper_loss: 0.0
-average reward score: 5.234375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.88%) |Training time=0.47s (22.05%) |Others=0.09 (4.08%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2620|ppo_ep: 1|act_loss: 0.01495361328125|cri_loss: 0.0078277587890625|unsuper_loss: 0.0
-average reward score: 5.56640625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.46s (21.54%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2621|ppo_ep: 1|act_loss: 0.04327392578125|cri_loss: 0.0222015380859375|unsuper_loss: 0.0
-average reward score: 5.6640625
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.58s (69.79%) |Training time=0.47s (20.78%) |Others=0.21 (9.43%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2622|ppo_ep: 1|act_loss: 0.0241546630859375|cri_loss: 0.01229095458984375|unsuper_loss: 0.0
-average reward score: 4.94140625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.43%) |Training time=0.47s (22.00%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2623|ppo_ep: 1|act_loss: -0.0059356689453125|cri_loss: -0.0025787353515625|unsuper_loss: 0.0
-average reward score: 5.203125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.63%) |Training time=0.47s (21.81%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2624|ppo_ep: 1|act_loss: -0.040802001953125|cri_loss: -0.0193634033203125|unsuper_loss: 0.0
-average reward score: 5.33984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.05%) |Training time=0.49s (22.41%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2625|ppo_ep: 1|act_loss: -0.048248291015625|cri_loss: -0.022308349609375|unsuper_loss: 0.0
-average reward score: 5.32421875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.80%) |Training time=0.46s (21.62%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2626|ppo_ep: 1|act_loss: 0.04718017578125|cri_loss: 0.024810791015625|unsuper_loss: 0.0
-average reward score: 5.44140625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.76%) |Training time=0.44s (20.64%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2627|ppo_ep: 1|act_loss: 0.0007905960083007812|cri_loss: 0.0007834434509277344|unsuper_loss: 0.0
-average reward score: 5.34765625
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.13%) |Training time=0.47s (21.40%) |Others=0.14 (6.47%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2628|ppo_ep: 1|act_loss: -0.0053253173828125|cri_loss: -0.0022602081298828125|unsuper_loss: 0.0
-average reward score: 5.625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.46%) |Training time=0.47s (21.94%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
-[2023-04-14 10:24:32,030] [INFO] [logging.py:96:log_dist] [Rank 0] step=2630, skipped=32, lr=[7.5854006982620415e-06, 7.5854006982620415e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:24:32,048] [INFO] [timer.py:199:stop] epoch=0/micro_step=2630/global_step=2630, RunningAvgSamplesPerSec=106.63587194700445, CurrSamplesPerSec=101.95049749373148, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:24:32,141] [INFO] [logging.py:96:log_dist] [Rank 0] step=2630, skipped=40, lr=[3.9365680160143595e-06, 3.9365680160143595e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2629|ppo_ep: 1|act_loss: 0.0084381103515625|cri_loss: 0.004375457763671875|unsuper_loss: 0.0
-average reward score: 5.5390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.57%) |Training time=0.48s (21.92%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2630|ppo_ep: 1|act_loss: 0.0084686279296875|cri_loss: 0.004428863525390625|unsuper_loss: 0.0
-average reward score: 5.171875
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.90%) |Training time=0.49s (20.89%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2631|ppo_ep: 1|act_loss: 0.016143798828125|cri_loss: 0.00824737548828125|unsuper_loss: 0.0
-average reward score: 5.921875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.72%) |Training time=0.46s (21.59%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2632|ppo_ep: 1|act_loss: -0.01343536376953125|cri_loss: -0.006603240966796875|unsuper_loss: 0.0
-average reward score: 5.2578125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.60%) |Training time=0.47s (21.83%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2633|ppo_ep: 1|act_loss: -0.0085601806640625|cri_loss: -0.003986358642578125|unsuper_loss: 0.0
-average reward score: 5.5
--------------------------------------------------------------------------------------
-|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.58s (67.05%) |Training time=0.47s (19.71%) |Others=0.31 (13.24%)|CurSamplesPerSec=13.55 |AvgSamplesPerSec=14.45
-[2023-04-14 10:24:43,163] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 2634|ppo_ep: 1|act_loss: 0.01061248779296875|cri_loss: 0.00551605224609375|unsuper_loss: 0.0
-average reward score: 5.1875
--------------------------------------------------------------------------------------
-|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.81%) |Training time=0.44s (20.57%) |Others=0.10 (4.62%)|CurSamplesPerSec=15.09 |AvgSamplesPerSec=14.45
-[2023-04-14 10:24:45,295] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 2635|ppo_ep: 1|act_loss: 0.00872039794921875|cri_loss: 0.005645751953125|unsuper_loss: 0.0
-average reward score: 4.89453125
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.91%) |Training time=0.44s (20.51%) |Others=0.10 (4.58%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2636|ppo_ep: 1|act_loss: 0.0165863037109375|cri_loss: 0.0097808837890625|unsuper_loss: 0.0
-average reward score: 5.81640625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.19%) |Training time=0.47s (21.32%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2637|ppo_ep: 1|act_loss: 0.00852203369140625|cri_loss: 0.004772186279296875|unsuper_loss: 0.0
-average reward score: 5.375
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.22%) |Training time=0.49s (21.46%) |Others=0.10 (4.32%)|CurSamplesPerSec=14.08 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2638|ppo_ep: 1|act_loss: -0.0084228515625|cri_loss: -0.003665924072265625|unsuper_loss: 0.0
-average reward score: 5.5234375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.63%) |Training time=0.47s (21.80%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
-[2023-04-14 10:24:54,042] [INFO] [logging.py:96:log_dist] [Rank 0] step=2640, skipped=34, lr=[7.573198939494354e-06, 7.573198939494354e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:24:54,736] [INFO] [timer.py:199:stop] epoch=0/micro_step=2640/global_step=2640, RunningAvgSamplesPerSec=106.54140046893538, CurrSamplesPerSec=32.69383225358878, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:24:54,829] [INFO] [logging.py:96:log_dist] [Rank 0] step=2640, skipped=40, lr=[3.928680160104563e-06, 3.928680160104563e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2639|ppo_ep: 1|act_loss: 0.0277099609375|cri_loss: 0.01422119140625|unsuper_loss: 0.0
-average reward score: 5.0859375
--------------------------------------------------------------------------------------
-|E2E latency=2.83s |Gather latency=0.00s (0.00%) |Generate time=1.59s (56.17%) |Training time=1.14s (40.35%) |Others=0.10 (3.48%)|CurSamplesPerSec=11.31 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2640|ppo_ep: 1|act_loss: 0.0245819091796875|cri_loss: 0.01282501220703125|unsuper_loss: 0.0
-average reward score: 5.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.23%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2641|ppo_ep: 1|act_loss: -0.00899505615234375|cri_loss: -0.00397491455078125|unsuper_loss: 0.0
-average reward score: 5.80078125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.14%) |Training time=0.44s (20.29%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2642|ppo_ep: 1|act_loss: -0.00818634033203125|cri_loss: -0.00392913818359375|unsuper_loss: 0.0
-average reward score: 5.5390625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.94%) |Training time=0.46s (21.48%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2643|ppo_ep: 1|act_loss: -0.0084381103515625|cri_loss: -0.003757476806640625|unsuper_loss: 0.0
-average reward score: 5.59375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.68%) |Training time=0.47s (21.74%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2644|ppo_ep: 1|act_loss: 0.021514892578125|cri_loss: 0.0110626220703125|unsuper_loss: 0.0
-average reward score: 4.75390625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.47s (21.96%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2645|ppo_ep: 1|act_loss: 0.02435302734375|cri_loss: 0.01253509521484375|unsuper_loss: 0.0
-average reward score: 5.77734375
--------------------------------------------------------------------------------------
-|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.58s (70.90%) |Training time=0.49s (21.96%) |Others=0.16 (7.14%)|CurSamplesPerSec=14.34 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2646|ppo_ep: 1|act_loss: 0.00466156005859375|cri_loss: 0.002773284912109375|unsuper_loss: 0.0
-average reward score: 5.30859375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.36%) |Training time=0.48s (21.81%) |Others=0.11 (4.83%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2647|ppo_ep: 1|act_loss: -0.0088958740234375|cri_loss: -0.0033321380615234375|unsuper_loss: 0.0
-average reward score: 5.34765625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.37%) |Training time=0.47s (22.05%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2648|ppo_ep: 1|act_loss: 0.00727081298828125|cri_loss: 0.004428863525390625|unsuper_loss: 0.0
-average reward score: 5.75
--------------------------------------------------------------------------------------
-|E2E latency=3.23s |Gather latency=0.00s (0.00%) |Generate time=1.58s (49.01%) |Training time=0.47s (14.46%) |Others=1.18 (36.53%)|CurSamplesPerSec=9.92 |AvgSamplesPerSec=14.45
-[2023-04-14 10:25:17,411] [INFO] [logging.py:96:log_dist] [Rank 0] step=2650, skipped=34, lr=[7.557910093162319e-06, 7.557910093162319e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:25:17,429] [INFO] [timer.py:199:stop] epoch=0/micro_step=2650/global_step=2650, RunningAvgSamplesPerSec=106.53553553330283, CurrSamplesPerSec=104.7835965849119, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:25:17,522] [INFO] [logging.py:96:log_dist] [Rank 0] step=2650, skipped=40, lr=[3.9207711277082896e-06, 3.9207711277082896e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2649|ppo_ep: 1|act_loss: -0.00821685791015625|cri_loss: -0.003902435302734375|unsuper_loss: 0.0
-average reward score: 5.953125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.62%) |Training time=0.47s (21.81%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2650|ppo_ep: 1|act_loss: -0.0178070068359375|cri_loss: -0.007549285888671875|unsuper_loss: 0.0
-average reward score: 5.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.45%) |Training time=0.48s (22.02%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2651|ppo_ep: 1|act_loss: 0.04248046875|cri_loss: 0.02374267578125|unsuper_loss: 0.0
-average reward score: 5.234375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.65%) |Training time=0.47s (21.76%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2652|ppo_ep: 1|act_loss: 0.02752685546875|cri_loss: 0.01483917236328125|unsuper_loss: 0.0
-average reward score: 5.07421875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.47s (22.06%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2653|ppo_ep: 1|act_loss: 0.027435302734375|cri_loss: 0.01416015625|unsuper_loss: 0.0
-average reward score: 5.66015625
--------------------------------------------------------------------------------------
-|E2E latency=3.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (49.52%) |Training time=0.48s (14.95%) |Others=1.13 (35.54%)|CurSamplesPerSec=10.06 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2654|ppo_ep: 1|act_loss: 0.0149993896484375|cri_loss: 0.0079498291015625|unsuper_loss: 0.0
-average reward score: 5.51953125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.48%) |Training time=0.47s (21.93%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2655|ppo_ep: 1|act_loss: 0.026458740234375|cri_loss: 0.014007568359375|unsuper_loss: 0.0
-average reward score: 4.9140625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.92%) |Training time=0.49s (22.55%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2656|ppo_ep: 1|act_loss: 0.0091094970703125|cri_loss: 0.0047760009765625|unsuper_loss: 0.0
-average reward score: 5.56640625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.24%) |Training time=0.49s (22.27%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2657|ppo_ep: 1|act_loss: -0.015716552734375|cri_loss: -0.007526397705078125|unsuper_loss: 0.0
-average reward score: 5.484375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.54%) |Training time=0.47s (21.89%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2658|ppo_ep: 1|act_loss: 0.0038909912109375|cri_loss: 0.0020961761474609375|unsuper_loss: 0.0
-average reward score: 5.37890625
--------------------------------------------------------------------------------------
-|E2E latency=2.73s |Gather latency=0.00s (0.00%) |Generate time=1.58s (57.72%) |Training time=0.48s (17.49%) |Others=0.68 (24.79%)|CurSamplesPerSec=11.71 |AvgSamplesPerSec=14.44
-[2023-04-14 10:25:40,790] [INFO] [logging.py:96:log_dist] [Rank 0] step=2660, skipped=34, lr=[7.542580738509916e-06, 7.542580738509916e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:25:40,809] [INFO] [timer.py:199:stop] epoch=0/micro_step=2660/global_step=2660, RunningAvgSamplesPerSec=106.51279863471905, CurrSamplesPerSec=93.23595653916608, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:25:40,901] [INFO] [logging.py:96:log_dist] [Rank 0] step=2660, skipped=40, lr=[3.91284103605648e-06, 3.91284103605648e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2659|ppo_ep: 1|act_loss: -0.034912109375|cri_loss: -0.01702880859375|unsuper_loss: 0.0
-average reward score: 5.25390625
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.74s (74.23%) |Training time=0.51s (21.59%) |Others=0.10 (4.18%)|CurSamplesPerSec=13.65 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2660|ppo_ep: 1|act_loss: -0.05914306640625|cri_loss: -0.029052734375|unsuper_loss: 0.0
-average reward score: 5.34765625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.74%) |Training time=0.47s (21.69%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2661|ppo_ep: 1|act_loss: 0.003200531005859375|cri_loss: 0.0020351409912109375|unsuper_loss: 0.0
-average reward score: 5.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.74%) |Training time=0.47s (21.70%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2662|ppo_ep: 1|act_loss: 0.0149688720703125|cri_loss: 0.0079803466796875|unsuper_loss: 0.0
-average reward score: 5.62109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.52%) |Training time=0.47s (21.90%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2663|ppo_ep: 1|act_loss: 0.048736572265625|cri_loss: 0.02532958984375|unsuper_loss: 0.0
-average reward score: 6.01171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (22.09%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2664|ppo_ep: 1|act_loss: 0.064208984375|cri_loss: 0.03277587890625|unsuper_loss: 0.0
-average reward score: 5.31640625
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.64%) |Training time=0.48s (21.92%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2665|ppo_ep: 1|act_loss: 0.0640869140625|cri_loss: 0.034332275390625|unsuper_loss: 0.0
-average reward score: 5.02734375
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.68s (73.92%) |Training time=0.50s (21.75%) |Others=0.10 (4.32%)|CurSamplesPerSec=14.05 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2666|ppo_ep: 1|act_loss: -0.00470733642578125|cri_loss: -0.001983642578125|unsuper_loss: 0.0
-average reward score: 4.765625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.08%) |Training time=0.46s (21.31%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2667|ppo_ep: 1|act_loss: -0.029388427734375|cri_loss: -0.0142974853515625|unsuper_loss: 0.0
-average reward score: 5.41015625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.66%) |Training time=0.44s (20.74%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2668|ppo_ep: 1|act_loss: -0.01666259765625|cri_loss: -0.007843017578125|unsuper_loss: 0.0
-average reward score: 5.3515625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.00%) |Training time=0.46s (21.24%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44
-[2023-04-14 10:26:02,506] [INFO] [logging.py:96:log_dist] [Rank 0] step=2670, skipped=34, lr=[7.5272111027551624e-06, 7.5272111027551624e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:26:02,524] [INFO] [timer.py:199:stop] epoch=0/micro_step=2670/global_step=2670, RunningAvgSamplesPerSec=106.50561698981126, CurrSamplesPerSec=109.33183449574705, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:26:02,618] [INFO] [logging.py:96:log_dist] [Rank 0] step=2670, skipped=40, lr=[3.904890002692223e-06, 3.904890002692223e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2669|ppo_ep: 1|act_loss: -0.06109619140625|cri_loss: -0.0295562744140625|unsuper_loss: 0.0
-average reward score: 5.3046875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.23%) |Training time=0.46s (21.19%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2670|ppo_ep: 1|act_loss: -0.0343017578125|cri_loss: -0.0163726806640625|unsuper_loss: 0.0
-average reward score: 5.1484375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.63%) |Training time=0.48s (21.84%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2671|ppo_ep: 1|act_loss: 0.048797607421875|cri_loss: 0.027435302734375|unsuper_loss: 0.0
-average reward score: 5.2578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.31%) |Training time=0.48s (22.12%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2672|ppo_ep: 1|act_loss: -0.0225067138671875|cri_loss: -0.01085662841796875|unsuper_loss: 0.0
-average reward score: 5.3203125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.51%) |Training time=0.47s (21.91%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2673|ppo_ep: 1|act_loss: 0.0294189453125|cri_loss: 0.01519012451171875|unsuper_loss: 0.0
-average reward score: 5.8359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.48s (22.07%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2674|ppo_ep: 1|act_loss: 0.05877685546875|cri_loss: 0.03143310546875|unsuper_loss: 0.0
-average reward score: 5.453125
--------------------------------------------------------------------------------------
-|E2E latency=2.68s |Gather latency=0.00s (0.00%) |Generate time=1.58s (58.89%) |Training time=0.49s (18.37%) |Others=0.61 (22.75%)|CurSamplesPerSec=11.95 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2675|ppo_ep: 1|act_loss: -0.0154571533203125|cri_loss: -0.00719451904296875|unsuper_loss: 0.0
-average reward score: 5.52734375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.23%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2676|ppo_ep: 1|act_loss: -0.0118865966796875|cri_loss: -0.004611968994140625|unsuper_loss: 0.0
-average reward score: 5.19921875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.30%) |Training time=0.48s (22.12%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2677|ppo_ep: 1|act_loss: 0.06536865234375|cri_loss: 0.0384521484375|unsuper_loss: 0.0
-average reward score: 5.265625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.20%) |Training time=0.48s (22.24%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2678|ppo_ep: 1|act_loss: -0.0438232421875|cri_loss: -0.021148681640625|unsuper_loss: 0.0
-average reward score: 5.61328125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.23%) |Training time=0.48s (22.20%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44
-[2023-04-14 10:26:24,579] [INFO] [logging.py:96:log_dist] [Rank 0] step=2680, skipped=34, lr=[7.511801413713135e-06, 7.511801413713135e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:26:24,597] [INFO] [timer.py:199:stop] epoch=0/micro_step=2680/global_step=2680, RunningAvgSamplesPerSec=106.48575335734733, CurrSamplesPerSec=102.37766112435794, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:26:24,690] [INFO] [logging.py:96:log_dist] [Rank 0] step=2680, skipped=40, lr=[3.896918145469013e-06, 3.896918145469013e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2679|ppo_ep: 1|act_loss: -0.02886962890625|cri_loss: -0.014129638671875|unsuper_loss: 0.0
-average reward score: 5.70703125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.31%) |Training time=0.47s (22.13%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2680|ppo_ep: 1|act_loss: 0.028411865234375|cri_loss: 0.0149383544921875|unsuper_loss: 0.0
-average reward score: 5.359375
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.57s (69.21%) |Training time=0.48s (21.17%) |Others=0.22 (9.62%)|CurSamplesPerSec=14.07 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2681|ppo_ep: 1|act_loss: -0.0035610198974609375|cri_loss: -0.0012836456298828125|unsuper_loss: 0.0
-average reward score: 5.421875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.07%) |Training time=0.48s (22.37%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2682|ppo_ep: 1|act_loss: 0.0278167724609375|cri_loss: 0.01442718505859375|unsuper_loss: 0.0
-average reward score: 5.39453125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.13%) |Training time=0.48s (22.30%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2683|ppo_ep: 1|act_loss: 0.00504302978515625|cri_loss: 0.003444671630859375|unsuper_loss: 0.0
-average reward score: 5.39453125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.28%) |Training time=0.48s (22.21%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2684|ppo_ep: 1|act_loss: 0.0285491943359375|cri_loss: 0.0146942138671875|unsuper_loss: 0.0
-average reward score: 5.25
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.19%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2685|ppo_ep: 1|act_loss: -0.033966064453125|cri_loss: -0.016143798828125|unsuper_loss: 0.0
-average reward score: 5.8046875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.01%) |Training time=0.49s (22.46%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2686|ppo_ep: 1|act_loss: 0.04290771484375|cri_loss: 0.02264404296875|unsuper_loss: 0.0
-average reward score: 5.5546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.41%) |Training time=0.47s (22.02%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2687|ppo_ep: 1|act_loss: -0.041107177734375|cri_loss: -0.0183868408203125|unsuper_loss: 0.0
-average reward score: 5.41796875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.47s (22.08%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2688|ppo_ep: 1|act_loss: -0.061004638671875|cri_loss: -0.029205322265625|unsuper_loss: 0.0
-average reward score: 5.61328125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.48s (22.11%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
-[2023-04-14 10:26:46,274] [INFO] [logging.py:96:log_dist] [Rank 0] step=2690, skipped=34, lr=[7.496351899792602e-06, 7.496351899792602e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:26:46,292] [INFO] [timer.py:199:stop] epoch=0/micro_step=2690/global_step=2690, RunningAvgSamplesPerSec=106.46413319981575, CurrSamplesPerSec=102.74899790854556, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:26:46,385] [INFO] [logging.py:96:log_dist] [Rank 0] step=2690, skipped=40, lr=[3.888925582549006e-06, 3.888925582549006e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2689|ppo_ep: 1|act_loss: 0.00598907470703125|cri_loss: 0.00348663330078125|unsuper_loss: 0.0
-average reward score: 5.0078125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.41%) |Training time=0.47s (22.04%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2690|ppo_ep: 1|act_loss: -0.04083251953125|cri_loss: -0.019989013671875|unsuper_loss: 0.0
-average reward score: 5.6796875
--------------------------------------------------------------------------------------
-|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.05%) |Training time=0.50s (20.98%) |Others=0.12 (4.97%)|CurSamplesPerSec=13.55 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2691|ppo_ep: 1|act_loss: -0.048583984375|cri_loss: -0.022552490234375|unsuper_loss: 0.0
-average reward score: 5.2109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.76%) |Training time=0.47s (21.66%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2692|ppo_ep: 1|act_loss: -0.016754150390625|cri_loss: -0.00821685791015625|unsuper_loss: 0.0
-average reward score: 5.5546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.31%) |Training time=0.47s (21.88%) |Others=0.10 (4.81%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2693|ppo_ep: 1|act_loss: -0.0209808349609375|cri_loss: -0.01012420654296875|unsuper_loss: 0.0
-average reward score: 5.796875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.22%) |Training time=0.49s (22.27%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2694|ppo_ep: 1|act_loss: -0.0040435791015625|cri_loss: -0.0018329620361328125|unsuper_loss: 0.0
-average reward score: 5.69140625
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.04%) |Training time=0.49s (21.66%) |Others=0.10 (4.31%)|CurSamplesPerSec=14.05 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2695|ppo_ep: 1|act_loss: -0.008453369140625|cri_loss: -0.003986358642578125|unsuper_loss: 0.0
-average reward score: 5.58984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.46%) |Training time=0.47s (21.97%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2696|ppo_ep: 1|act_loss: 0.064208984375|cri_loss: 0.03363037109375|unsuper_loss: 0.0
-average reward score: 5.484375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.51%) |Training time=0.47s (21.92%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2697|ppo_ep: 1|act_loss: 0.016510009765625|cri_loss: 0.00865936279296875|unsuper_loss: 0.0
-average reward score: 5.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.55%) |Training time=0.47s (21.90%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2698|ppo_ep: 1|act_loss: -0.002437591552734375|cri_loss: -0.0010318756103515625|unsuper_loss: 0.0
-average reward score: 5.7421875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.46%) |Training time=0.47s (21.98%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
-[2023-04-14 10:27:08,167] [INFO] [logging.py:96:log_dist] [Rank 0] step=2700, skipped=34, lr=[7.480862789992629e-06, 7.480862789992629e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:27:08,185] [INFO] [timer.py:199:stop] epoch=0/micro_step=2700/global_step=2700, RunningAvgSamplesPerSec=106.4442632540804, CurrSamplesPerSec=101.70059117973773, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:27:08,278] [INFO] [logging.py:96:log_dist] [Rank 0] step=2700, skipped=40, lr=[3.880912432401265e-06, 3.880912432401265e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2699|ppo_ep: 1|act_loss: -0.00817108154296875|cri_loss: -0.003662109375|unsuper_loss: 0.0
-average reward score: 5.45703125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.27%) |Training time=0.48s (22.18%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2700|ppo_ep: 1|act_loss: -0.0207061767578125|cri_loss: -0.00963592529296875|unsuper_loss: 0.0
-average reward score: 5.48046875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.44%) |Training time=0.48s (22.05%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2701|ppo_ep: 1|act_loss: 0.0119171142578125|cri_loss: 0.0084075927734375|unsuper_loss: 0.0
-average reward score: 5.1640625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.82%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2702|ppo_ep: 1|act_loss: 0.043212890625|cri_loss: 0.0235595703125|unsuper_loss: 0.0
-average reward score: 5.390625
--------------------------------------------------------------------------------------
-|E2E latency=2.53s |Gather latency=0.00s (0.00%) |Generate time=1.58s (62.43%) |Training time=0.48s (18.84%) |Others=0.47 (18.73%)|CurSamplesPerSec=12.64 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2703|ppo_ep: 1|act_loss: 0.019866943359375|cri_loss: 0.011932373046875|unsuper_loss: 0.0
-average reward score: 5.12890625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.62%) |Training time=0.47s (21.80%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2704|ppo_ep: 1|act_loss: -0.0029621124267578125|cri_loss: -0.0009555816650390625|unsuper_loss: 0.0
-average reward score: 5.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.68s (71.74%) |Training time=0.56s (24.05%) |Others=0.10 (4.21%)|CurSamplesPerSec=13.64 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2705|ppo_ep: 1|act_loss: -0.01299285888671875|cri_loss: -0.006378173828125|unsuper_loss: 0.0
-average reward score: 5.4296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.43%) |Training time=0.47s (22.00%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2706|ppo_ep: 1|act_loss: -0.016571044921875|cri_loss: -0.0079193115234375|unsuper_loss: 0.0
-average reward score: 5.5234375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.59%) |Training time=0.47s (21.84%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2707|ppo_ep: 1|act_loss: -0.016357421875|cri_loss: -0.00804901123046875|unsuper_loss: 0.0
-average reward score: 5.52734375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.53%) |Training time=0.47s (21.90%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2708|ppo_ep: 1|act_loss: 0.13623046875|cri_loss: 0.07940673828125|unsuper_loss: 0.0
-average reward score: 4.8671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.48s (22.07%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45
-[2023-04-14 10:27:30,285] [INFO] [logging.py:96:log_dist] [Rank 0] step=2710, skipped=34, lr=[7.465334313899188e-06, 7.465334313899188e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:27:30,304] [INFO] [timer.py:199:stop] epoch=0/micro_step=2710/global_step=2710, RunningAvgSamplesPerSec=106.42161862003059, CurrSamplesPerSec=104.10446923041124, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:27:30,396] [INFO] [logging.py:96:log_dist] [Rank 0] step=2710, skipped=40, lr=[3.8728788138000064e-06, 3.8728788138000064e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2709|ppo_ep: 1|act_loss: -0.020050048828125|cri_loss: -0.00969696044921875|unsuper_loss: 0.0
-average reward score: 5.328125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.62%) |Training time=0.47s (21.83%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2710|ppo_ep: 1|act_loss: 0.007110595703125|cri_loss: 0.003948211669921875|unsuper_loss: 0.0
-average reward score: 5.4921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.88%) |Training time=0.49s (22.58%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2711|ppo_ep: 1|act_loss: -0.0193634033203125|cri_loss: -0.007534027099609375|unsuper_loss: 0.0
-average reward score: 5.2578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.39%) |Training time=0.48s (22.04%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2712|ppo_ep: 1|act_loss: 0.0090179443359375|cri_loss: 0.004947662353515625|unsuper_loss: 0.0
-average reward score: 5.53125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.68%) |Training time=0.47s (21.75%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2713|ppo_ep: 1|act_loss: -0.0018482208251953125|cri_loss: -0.00031375885009765625|unsuper_loss: 0.0
-average reward score: 5.57421875
--------------------------------------------------------------------------------------
-|E2E latency=2.46s |Gather latency=0.00s (0.00%) |Generate time=1.58s (64.20%) |Training time=0.47s (19.22%) |Others=0.41 (16.59%)|CurSamplesPerSec=13.00 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2714|ppo_ep: 1|act_loss: -0.03118896484375|cri_loss: -0.01531982421875|unsuper_loss: 0.0
-average reward score: 6.10546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.96%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2715|ppo_ep: 1|act_loss: -0.035400390625|cri_loss: -0.016998291015625|unsuper_loss: 0.0
-average reward score: 5.68359375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.51%) |Training time=0.48s (21.96%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2716|ppo_ep: 1|act_loss: -0.0191192626953125|cri_loss: -0.0081634521484375|unsuper_loss: 0.0
-average reward score: 5.05859375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.09%) |Training time=0.48s (22.32%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2717|ppo_ep: 1|act_loss: 0.01910400390625|cri_loss: 0.010162353515625|unsuper_loss: 0.0
-average reward score: 5.80859375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.18%) |Training time=0.48s (22.27%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2718|ppo_ep: 1|act_loss: 0.0635986328125|cri_loss: 0.035491943359375|unsuper_loss: 0.0
-average reward score: 5.53125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.50%) |Training time=0.47s (21.94%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
-[2023-04-14 10:27:52,375] [INFO] [logging.py:96:log_dist] [Rank 0] step=2720, skipped=34, lr=[7.4497667016817536e-06, 7.4497667016817536e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:27:52,393] [INFO] [timer.py:199:stop] epoch=0/micro_step=2720/global_step=2720, RunningAvgSamplesPerSec=106.40045873498524, CurrSamplesPerSec=95.01305938452128, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:27:52,486] [INFO] [logging.py:96:log_dist] [Rank 0] step=2720, skipped=40, lr=[3.864824845822837e-06, 3.864824845822837e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2719|ppo_ep: 1|act_loss: 0.00815582275390625|cri_loss: 0.004367828369140625|unsuper_loss: 0.0
-average reward score: 5.6328125
--------------------------------------------------------------------------------------
-|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.53%) |Training time=0.50s (21.30%) |Others=0.10 (4.18%)|CurSamplesPerSec=13.64 |AvgSamplesPerSec=14.45
-[2023-04-14 10:27:54,627] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 2720|ppo_ep: 1|act_loss: 0.1143798828125|cri_loss: 0.06793212890625|unsuper_loss: 0.0
-average reward score: 5.37890625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.05%) |Training time=0.47s (21.86%) |Others=0.09 (4.09%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.45
-[2023-04-14 10:27:56,766] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 2721|ppo_ep: 1|act_loss: -0.01043701171875|cri_loss: -0.004398345947265625|unsuper_loss: 0.0
-average reward score: 5.41796875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.16%) |Training time=0.47s (21.75%) |Others=0.09 (4.09%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2722|ppo_ep: 1|act_loss: 0.0159454345703125|cri_loss: 0.0082550048828125|unsuper_loss: 0.0
-average reward score: 5.39453125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.23%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2723|ppo_ep: 1|act_loss: 0.03460693359375|cri_loss: 0.01800537109375|unsuper_loss: 0.0
-average reward score: 4.86328125
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.61s (71.37%) |Training time=0.55s (24.27%) |Others=0.10 (4.36%)|CurSamplesPerSec=14.23 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2724|ppo_ep: 1|act_loss: 0.005645751953125|cri_loss: 0.003017425537109375|unsuper_loss: 0.0
-average reward score: 5.21484375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.51%) |Training time=0.47s (21.91%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2725|ppo_ep: 1|act_loss: 0.01123046875|cri_loss: 0.006679534912109375|unsuper_loss: 0.0
-average reward score: 5.51171875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.19%) |Training time=0.48s (22.22%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2726|ppo_ep: 1|act_loss: -0.03509521484375|cri_loss: -0.01535797119140625|unsuper_loss: 0.0
-average reward score: 5.453125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (22.10%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2727|ppo_ep: 1|act_loss: -0.026458740234375|cri_loss: -0.01274871826171875|unsuper_loss: 0.0
-average reward score: 5.49609375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.48%) |Training time=0.47s (21.94%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2728|ppo_ep: 1|act_loss: -0.011383056640625|cri_loss: -0.005321502685546875|unsuper_loss: 0.0
-average reward score: 5.37890625
--------------------------------------------------------------------------------------
-|E2E latency=2.55s |Gather latency=0.00s (0.00%) |Generate time=1.57s (61.74%) |Training time=0.48s (18.74%) |Others=0.50 (19.52%)|CurSamplesPerSec=12.54 |AvgSamplesPerSec=14.45
-[2023-04-14 10:28:14,381] [INFO] [logging.py:96:log_dist] [Rank 0] step=2730, skipped=34, lr=[7.434160184089898e-06, 7.434160184089898e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:28:14,399] [INFO] [timer.py:199:stop] epoch=0/micro_step=2730/global_step=2730, RunningAvgSamplesPerSec=106.37902719012551, CurrSamplesPerSec=101.46724635971965, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:28:14,492] [INFO] [logging.py:96:log_dist] [Rank 0] step=2730, skipped=42, lr=[3.8583671001032015e-06, 3.8583671001032015e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2729|ppo_ep: 1|act_loss: 0.02996826171875|cri_loss: 0.0161590576171875|unsuper_loss: 0.0
-average reward score: 5.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.22%) |Training time=0.48s (22.20%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2730|ppo_ep: 1|act_loss: -0.06396484375|cri_loss: -0.0307159423828125|unsuper_loss: 0.0
-average reward score: 5.01953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.33%) |Training time=0.46s (21.11%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2731|ppo_ep: 1|act_loss: 0.0201263427734375|cri_loss: 0.0107421875|unsuper_loss: 0.0
-average reward score: 5.6328125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.28%) |Training time=0.46s (21.14%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2732|ppo_ep: 1|act_loss: 0.0035247802734375|cri_loss: 0.002044677734375|unsuper_loss: 0.0
-average reward score: 5.04296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.99%) |Training time=0.46s (21.43%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2733|ppo_ep: 1|act_loss: 0.03350830078125|cri_loss: 0.017333984375|unsuper_loss: 0.0
-average reward score: 5.53515625
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.05%) |Training time=0.52s (23.51%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.42 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2734|ppo_ep: 1|act_loss: 0.0076904296875|cri_loss: 0.004077911376953125|unsuper_loss: 0.0
-average reward score: 5.3671875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.46s (21.28%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2735|ppo_ep: 1|act_loss: 0.029144287109375|cri_loss: 0.01526641845703125|unsuper_loss: 0.0
-average reward score: 5.296875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.90%) |Training time=0.47s (21.54%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45
-[2023-04-14 10:28:29,527] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 2736|ppo_ep: 1|act_loss: 0.005260467529296875|cri_loss: 0.00360870361328125|unsuper_loss: 0.0
-average reward score: 5.15625
--------------------------------------------------------------------------------------
-|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.33%) |Training time=0.43s (20.06%) |Others=0.10 (4.61%)|CurSamplesPerSec=15.09 |AvgSamplesPerSec=14.45
-[2023-04-14 10:28:31,651] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 2737|ppo_ep: 1|act_loss: -0.044464111328125|cri_loss: -0.0214996337890625|unsuper_loss: 0.0
-average reward score: 4.6875
--------------------------------------------------------------------------------------
-|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.31%) |Training time=0.43s (20.06%) |Others=0.10 (4.63%)|CurSamplesPerSec=15.07 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2738|ppo_ep: 1|act_loss: 0.0011739730834960938|cri_loss: 0.000949859619140625|unsuper_loss: 0.0
-average reward score: 5.28515625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.01%) |Training time=0.47s (21.49%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.45
-[2023-04-14 10:28:35,964] [INFO] [logging.py:96:log_dist] [Rank 0] step=2740, skipped=36, lr=[7.421647113578771e-06, 7.421647113578771e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:28:35,983] [INFO] [timer.py:199:stop] epoch=0/micro_step=2740/global_step=2740, RunningAvgSamplesPerSec=106.38762931965792, CurrSamplesPerSec=108.80439440355586, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:28:36,075] [INFO] [logging.py:96:log_dist] [Rank 0] step=2740, skipped=42, lr=[3.850276804286629e-06, 3.850276804286629e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2739|ppo_ep: 1|act_loss: -0.00032520294189453125|cri_loss: 0.0002455711364746094|unsuper_loss: 0.0
-average reward score: 5.57421875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.22%) |Training time=0.46s (21.23%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2740|ppo_ep: 1|act_loss: -0.03570556640625|cri_loss: -0.0167694091796875|unsuper_loss: 0.0
-average reward score: 5.21875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.46s (21.15%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2741|ppo_ep: 1|act_loss: -0.00420379638671875|cri_loss: 0.002628326416015625|unsuper_loss: 0.0
-average reward score: 5.55859375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.46s (21.20%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2742|ppo_ep: 1|act_loss: 0.03314208984375|cri_loss: 0.0172271728515625|unsuper_loss: 0.0
-average reward score: 5.1796875
--------------------------------------------------------------------------------------
-|E2E latency=3.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (50.06%) |Training time=0.46s (14.54%) |Others=1.13 (35.40%)|CurSamplesPerSec=10.05 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2743|ppo_ep: 1|act_loss: 0.0535888671875|cri_loss: 0.030548095703125|unsuper_loss: 0.0
-average reward score: 4.8125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.16%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2744|ppo_ep: 1|act_loss: 0.04168701171875|cri_loss: 0.021636962890625|unsuper_loss: 0.0
-average reward score: 5.18359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.46%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2745|ppo_ep: 1|act_loss: 0.0287628173828125|cri_loss: 0.01482391357421875|unsuper_loss: 0.0
-average reward score: 6.07421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.24%) |Training time=0.46s (21.22%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2746|ppo_ep: 1|act_loss: 0.0121002197265625|cri_loss: 0.006809234619140625|unsuper_loss: 0.0
-average reward score: 5.1875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.41%) |Training time=0.45s (21.00%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2747|ppo_ep: 1|act_loss: -0.0285797119140625|cri_loss: -0.0139007568359375|unsuper_loss: 0.0
-average reward score: 5.5859375
--------------------------------------------------------------------------------------
-|E2E latency=2.44s |Gather latency=0.00s (0.00%) |Generate time=1.60s (65.48%) |Training time=0.45s (18.31%) |Others=0.40 (16.21%)|CurSamplesPerSec=13.13 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2748|ppo_ep: 1|act_loss: -0.047943115234375|cri_loss: -0.0233612060546875|unsuper_loss: 0.0
-average reward score: 5.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.76s (76.32%) |Training time=0.45s (19.41%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.87 |AvgSamplesPerSec=14.44
-[2023-04-14 10:28:58,976] [INFO] [logging.py:96:log_dist] [Rank 0] step=2750, skipped=36, lr=[7.405971149640408e-06, 7.405971149640408e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:28:58,994] [INFO] [timer.py:199:stop] epoch=0/micro_step=2750/global_step=2750, RunningAvgSamplesPerSec=106.39908594796809, CurrSamplesPerSec=114.45823124842768, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:28:59,087] [INFO] [logging.py:96:log_dist] [Rank 0] step=2750, skipped=42, lr=[3.842166494110451e-06, 3.842166494110451e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2749|ppo_ep: 1|act_loss: 0.000629425048828125|cri_loss: 0.002536773681640625|unsuper_loss: 0.0
-average reward score: 5.46484375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.82%) |Training time=0.44s (20.61%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2750|ppo_ep: 1|act_loss: -0.030181884765625|cri_loss: -0.013763427734375|unsuper_loss: 0.0
-average reward score: 5.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.41%) |Training time=0.45s (21.01%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2751|ppo_ep: 1|act_loss: -0.0014810562133789062|cri_loss: -0.00019550323486328125|unsuper_loss: 0.0
-average reward score: 5.28125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.31%) |Training time=0.45s (21.08%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2752|ppo_ep: 1|act_loss: 0.04644775390625|cri_loss: 0.02606201171875|unsuper_loss: 0.0
-average reward score: 5.5625
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.69%) |Training time=0.45s (19.96%) |Others=0.10 (4.35%)|CurSamplesPerSec=14.13 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2753|ppo_ep: 1|act_loss: 0.08673095703125|cri_loss: 0.046783447265625|unsuper_loss: 0.0
-average reward score: 5.1171875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.51%) |Training time=0.45s (20.91%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2754|ppo_ep: 1|act_loss: -0.0129241943359375|cri_loss: -0.00525665283203125|unsuper_loss: 0.0
-average reward score: 6.1953125
--------------------------------------------------------------------------------------
-|E2E latency=2.83s |Gather latency=0.00s (0.00%) |Generate time=1.60s (56.37%) |Training time=0.45s (15.83%) |Others=0.79 (27.80%)|CurSamplesPerSec=11.29 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2755|ppo_ep: 1|act_loss: 0.030029296875|cri_loss: 0.0172119140625|unsuper_loss: 0.0
-average reward score: 5.08984375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.58%) |Training time=0.45s (20.83%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2756|ppo_ep: 1|act_loss: -0.00177001953125|cri_loss: -0.0005936622619628906|unsuper_loss: 0.0
-average reward score: 4.97265625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.64%) |Training time=0.45s (20.79%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2757|ppo_ep: 1|act_loss: 0.004314422607421875|cri_loss: 0.00274658203125|unsuper_loss: 0.0
-average reward score: 4.90234375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.58%) |Training time=0.45s (20.82%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2758|ppo_ep: 1|act_loss: -0.0126495361328125|cri_loss: -0.0058746337890625|unsuper_loss: 0.0
-average reward score: 5.8046875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (21.04%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44
-[2023-04-14 10:29:21,250] [INFO] [logging.py:96:log_dist] [Rank 0] step=2760, skipped=36, lr=[7.390256929483354e-06, 7.390256929483354e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:29:21,268] [INFO] [timer.py:199:stop] epoch=0/micro_step=2760/global_step=2760, RunningAvgSamplesPerSec=106.42223778280272, CurrSamplesPerSec=131.88637259169658, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:29:21,361] [INFO] [logging.py:96:log_dist] [Rank 0] step=2760, skipped=42, lr=[3.83403628978903e-06, 3.83403628978903e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2759|ppo_ep: 1|act_loss: -0.024261474609375|cri_loss: -0.00824737548828125|unsuper_loss: 0.0
-average reward score: 5.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.47%) |Training time=0.41s (18.93%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2760|ppo_ep: 1|act_loss: 0.00678253173828125|cri_loss: 0.003955841064453125|unsuper_loss: 0.0
-average reward score: 5.1875
--------------------------------------------------------------------------------------
-|E2E latency=3.42s |Gather latency=0.00s (0.00%) |Generate time=1.61s (47.01%) |Training time=0.43s (12.67%) |Others=1.38 (40.32%)|CurSamplesPerSec=9.35 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2761|ppo_ep: 1|act_loss: -0.016937255859375|cri_loss: -0.0078582763671875|unsuper_loss: 0.0
-average reward score: 5.7109375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.11%) |Training time=0.50s (22.90%) |Others=0.11 (4.99%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2762|ppo_ep: 1|act_loss: 0.0010814666748046875|cri_loss: 0.0017833709716796875|unsuper_loss: 0.0
-average reward score: 4.83203125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.38%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2763|ppo_ep: 1|act_loss: 0.028076171875|cri_loss: 0.014312744140625|unsuper_loss: 0.0
-average reward score: 5.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.86%) |Training time=0.46s (21.56%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2764|ppo_ep: 1|act_loss: 0.03607177734375|cri_loss: 0.0192108154296875|unsuper_loss: 0.0
-average reward score: 5.265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.92%) |Training time=0.47s (21.53%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2765|ppo_ep: 1|act_loss: 0.00830078125|cri_loss: 0.00431060791015625|unsuper_loss: 0.0
-average reward score: 6.3828125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.45s (21.21%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2766|ppo_ep: 1|act_loss: -0.0135498046875|cri_loss: -0.004924774169921875|unsuper_loss: 0.0
-average reward score: 5.12890625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.60%) |Training time=0.47s (21.83%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2767|ppo_ep: 1|act_loss: -0.008453369140625|cri_loss: -0.003582000732421875|unsuper_loss: 0.0
-average reward score: 5.421875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.28%) |Training time=0.48s (22.15%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2768|ppo_ep: 1|act_loss: -0.0152740478515625|cri_loss: -0.006717681884765625|unsuper_loss: 0.0
-average reward score: 5.26953125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.62%) |Training time=0.47s (21.79%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.44
-[2023-04-14 10:29:44,042] [INFO] [logging.py:96:log_dist] [Rank 0] step=2770, skipped=36, lr=[7.37450468603026e-06, 7.37450468603026e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:29:44,060] [INFO] [timer.py:199:stop] epoch=0/micro_step=2770/global_step=2770, RunningAvgSamplesPerSec=106.41993681795651, CurrSamplesPerSec=104.47658694697775, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:29:44,153] [INFO] [logging.py:96:log_dist] [Rank 0] step=2770, skipped=42, lr=[3.825886311831606e-06, 3.825886311831606e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2769|ppo_ep: 1|act_loss: 0.055755615234375|cri_loss: 0.0288848876953125|unsuper_loss: 0.0
-average reward score: 5.76171875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.60%) |Training time=0.47s (21.84%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2770|ppo_ep: 1|act_loss: -0.013946533203125|cri_loss: -0.006404876708984375|unsuper_loss: 0.0
-average reward score: 5.30859375
--------------------------------------------------------------------------------------
-|E2E latency=2.43s |Gather latency=0.00s (0.00%) |Generate time=1.58s (65.02%) |Training time=0.47s (19.46%) |Others=0.38 (15.52%)|CurSamplesPerSec=13.19 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2771|ppo_ep: 1|act_loss: 0.01171875|cri_loss: 0.006114959716796875|unsuper_loss: 0.0
-average reward score: 5.0859375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.53%) |Training time=0.47s (21.90%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2772|ppo_ep: 1|act_loss: -0.00611114501953125|cri_loss: -0.0029449462890625|unsuper_loss: 0.0
-average reward score: 5.19921875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.20%) |Training time=0.48s (22.24%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2773|ppo_ep: 1|act_loss: 0.00923919677734375|cri_loss: 0.005191802978515625|unsuper_loss: 0.0
-average reward score: 5.0859375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.35%) |Training time=0.47s (22.06%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2774|ppo_ep: 1|act_loss: -0.0199432373046875|cri_loss: -0.00905609130859375|unsuper_loss: 0.0
-average reward score: 5.078125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.46s (21.26%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2775|ppo_ep: 1|act_loss: -0.026885986328125|cri_loss: -0.01313018798828125|unsuper_loss: 0.0
-average reward score: 5.03515625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.07%) |Training time=0.46s (21.31%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2776|ppo_ep: 1|act_loss: -0.0113983154296875|cri_loss: -0.00531005859375|unsuper_loss: 0.0
-average reward score: 5.3984375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.83%) |Training time=0.46s (21.57%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2777|ppo_ep: 1|act_loss: -0.0291748046875|cri_loss: -0.014312744140625|unsuper_loss: 0.0
-average reward score: 5.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.08%) |Training time=0.50s (22.59%) |Others=0.12 (5.33%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2778|ppo_ep: 1|act_loss: 0.044647216796875|cri_loss: 0.0236968994140625|unsuper_loss: 0.0
-average reward score: 5.8828125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.46s (21.28%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.44
-[2023-04-14 10:30:05,837] [INFO] [logging.py:96:log_dist] [Rank 0] step=2780, skipped=36, lr=[7.358714652767375e-06, 7.358714652767375e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:30:05,856] [INFO] [timer.py:199:stop] epoch=0/micro_step=2780/global_step=2780, RunningAvgSamplesPerSec=106.41343644976286, CurrSamplesPerSec=105.34027242011247, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:30:05,955] [INFO] [logging.py:96:log_dist] [Rank 0] step=2780, skipped=42, lr=[3.817716681040515e-06, 3.817716681040515e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2779|ppo_ep: 1|act_loss: 0.07403564453125|cri_loss: 0.03955078125|unsuper_loss: 0.0
-average reward score: 5.375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.65%) |Training time=0.47s (21.58%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2780|ppo_ep: 1|act_loss: 0.03387451171875|cri_loss: 0.0186920166015625|unsuper_loss: 0.0
-average reward score: 4.8359375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.51%) |Training time=0.46s (21.19%) |Others=0.12 (5.29%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2781|ppo_ep: 1|act_loss: 0.061767578125|cri_loss: 0.032928466796875|unsuper_loss: 0.0
-average reward score: 5.06640625
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.51%) |Training time=0.45s (20.13%) |Others=0.10 (4.36%)|CurSamplesPerSec=14.21 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2782|ppo_ep: 1|act_loss: -0.02105712890625|cri_loss: -0.007843017578125|unsuper_loss: 0.0
-average reward score: 4.9453125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.91%) |Training time=0.46s (21.49%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2783|ppo_ep: 1|act_loss: -0.0007939338684082031|cri_loss: -0.00011396408081054688|unsuper_loss: 0.0
-average reward score: 5.2734375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.07%) |Training time=0.46s (21.34%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2784|ppo_ep: 1|act_loss: -0.057586669921875|cri_loss: -0.0283203125|unsuper_loss: 0.0
-average reward score: 5.1484375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.14%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2785|ppo_ep: 1|act_loss: -0.04052734375|cri_loss: -0.019744873046875|unsuper_loss: 0.0
-average reward score: 5.296875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.70%) |Training time=0.47s (21.72%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2786|ppo_ep: 1|act_loss: 0.03509521484375|cri_loss: 0.0211029052734375|unsuper_loss: 0.0
-average reward score: 4.828125
--------------------------------------------------------------------------------------
-|E2E latency=2.43s |Gather latency=0.00s (0.00%) |Generate time=1.58s (65.20%) |Training time=0.46s (19.06%) |Others=0.38 (15.74%)|CurSamplesPerSec=13.17 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2787|ppo_ep: 1|act_loss: -0.0209197998046875|cri_loss: -0.0095672607421875|unsuper_loss: 0.0
-average reward score: 5.1328125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.47%) |Training time=0.45s (20.92%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2788|ppo_ep: 1|act_loss: 0.0149383544921875|cri_loss: 0.00780487060546875|unsuper_loss: 0.0
-average reward score: 5.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.78%) |Training time=0.46s (21.62%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.44
-[2023-04-14 10:30:27,697] [INFO] [logging.py:96:log_dist] [Rank 0] step=2790, skipped=36, lr=[7.34288706374108e-06, 7.34288706374108e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:30:27,715] [INFO] [timer.py:199:stop] epoch=0/micro_step=2790/global_step=2790, RunningAvgSamplesPerSec=106.42220408681138, CurrSamplesPerSec=112.92798048000672, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:30:27,808] [INFO] [logging.py:96:log_dist] [Rank 0] step=2790, skipped=42, lr=[3.8095275185093927e-06, 3.8095275185093927e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2789|ppo_ep: 1|act_loss: -0.0347900390625|cri_loss: -0.0169677734375|unsuper_loss: 0.0
-average reward score: 5.078125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.51%) |Training time=0.45s (20.90%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2790|ppo_ep: 1|act_loss: -0.03375244140625|cri_loss: -0.01496124267578125|unsuper_loss: 0.0
-average reward score: 5.703125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.27%) |Training time=0.45s (21.13%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2791|ppo_ep: 1|act_loss: 0.0228271484375|cri_loss: 0.012115478515625|unsuper_loss: 0.0
-average reward score: 5.27734375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.55%) |Training time=0.47s (21.88%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2792|ppo_ep: 1|act_loss: 0.0075836181640625|cri_loss: 0.00460052490234375|unsuper_loss: 0.0
-average reward score: 5.42578125
--------------------------------------------------------------------------------------
-|E2E latency=2.52s |Gather latency=0.00s (0.00%) |Generate time=1.77s (69.97%) |Training time=0.46s (18.37%) |Others=0.29 (11.66%)|CurSamplesPerSec=12.68 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2793|ppo_ep: 1|act_loss: 0.034515380859375|cri_loss: 0.018157958984375|unsuper_loss: 0.0
-average reward score: 4.91015625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.72%) |Training time=0.47s (21.70%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2794|ppo_ep: 1|act_loss: -0.01983642578125|cri_loss: -0.0094757080078125|unsuper_loss: 0.0
-average reward score: 5.43359375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.94%) |Training time=0.46s (21.46%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2795|ppo_ep: 1|act_loss: -0.021636962890625|cri_loss: -0.01044464111328125|unsuper_loss: 0.0
-average reward score: 5.48828125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.67%) |Training time=0.47s (21.74%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2796|ppo_ep: 1|act_loss: -0.01271820068359375|cri_loss: -0.0045928955078125|unsuper_loss: 0.0
-average reward score: 5.3203125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.63%) |Training time=0.47s (21.80%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2797|ppo_ep: 1|act_loss: 0.015655517578125|cri_loss: 0.0091094970703125|unsuper_loss: 0.0
-average reward score: 5.40234375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.56%) |Training time=0.47s (21.87%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2798|ppo_ep: 1|act_loss: 0.00893402099609375|cri_loss: 0.004947662353515625|unsuper_loss: 0.0
-average reward score: 5.15234375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.43%) |Training time=0.47s (21.99%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
-[2023-04-14 10:30:49,522] [INFO] [logging.py:96:log_dist] [Rank 0] step=2800, skipped=36, lr=[7.327022153554431e-06, 7.327022153554431e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:30:49,540] [INFO] [timer.py:199:stop] epoch=0/micro_step=2800/global_step=2800, RunningAvgSamplesPerSec=106.41914468193407, CurrSamplesPerSec=104.58469930026338, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:30:49,633] [INFO] [logging.py:96:log_dist] [Rank 0] step=2800, skipped=42, lr=[3.8013189456213825e-06, 3.8013189456213825e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2799|ppo_ep: 1|act_loss: -0.0113983154296875|cri_loss: -0.00405120849609375|unsuper_loss: 0.0
-average reward score: 5.8203125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.61%) |Training time=0.47s (21.83%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2800|ppo_ep: 1|act_loss: -0.0082550048828125|cri_loss: -0.002552032470703125|unsuper_loss: 0.0
-average reward score: 5.296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.96%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2801|ppo_ep: 1|act_loss: -0.04388427734375|cri_loss: -0.0208587646484375|unsuper_loss: 0.0
-average reward score: 5.52734375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.67%) |Training time=0.47s (21.75%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2802|ppo_ep: 1|act_loss: -0.00791168212890625|cri_loss: -0.003131866455078125|unsuper_loss: 0.0
-average reward score: 5.01171875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.52%) |Training time=0.47s (21.90%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2803|ppo_ep: 1|act_loss: 0.00707244873046875|cri_loss: 0.00591278076171875|unsuper_loss: 0.0
-average reward score: 5.3515625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.71%) |Training time=0.47s (21.71%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2804|ppo_ep: 1|act_loss: -0.0084075927734375|cri_loss: -0.003631591796875|unsuper_loss: 0.0
-average reward score: 5.16015625
--------------------------------------------------------------------------------------
-|E2E latency=2.87s |Gather latency=0.00s (0.00%) |Generate time=1.59s (55.46%) |Training time=0.46s (16.10%) |Others=0.81 (28.43%)|CurSamplesPerSec=11.17 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2805|ppo_ep: 1|act_loss: 0.0189971923828125|cri_loss: 0.0105438232421875|unsuper_loss: 0.0
-average reward score: 4.80859375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.33%) |Training time=0.48s (22.09%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2806|ppo_ep: 1|act_loss: 0.057586669921875|cri_loss: 0.0298919677734375|unsuper_loss: 0.0
-average reward score: 5.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.82%) |Training time=0.46s (19.91%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.88 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2807|ppo_ep: 1|act_loss: 0.03485107421875|cri_loss: 0.01806640625|unsuper_loss: 0.0
-average reward score: 5.671875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.12%) |Training time=0.46s (21.29%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2808|ppo_ep: 1|act_loss: 0.054534912109375|cri_loss: 0.0288238525390625|unsuper_loss: 0.0
-average reward score: 5.7109375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.85%) |Training time=0.46s (21.57%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44
-[2023-04-14 10:31:11,871] [INFO] [logging.py:96:log_dist] [Rank 0] step=2810, skipped=36, lr=[7.311120157363665e-06, 7.311120157363665e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:31:11,889] [INFO] [timer.py:199:stop] epoch=0/micro_step=2810/global_step=2810, RunningAvgSamplesPerSec=106.41754482170897, CurrSamplesPerSec=110.76354693624923, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:31:11,994] [INFO] [logging.py:96:log_dist] [Rank 0] step=2810, skipped=42, lr=[3.7930910840473377e-06, 3.7930910840473377e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2809|ppo_ep: 1|act_loss: 0.0004031658172607422|cri_loss: 0.00038695335388183594|unsuper_loss: 0.0
-average reward score: 5.171875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.45s (20.91%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2810|ppo_ep: 1|act_loss: 0.01690673828125|cri_loss: 0.0100555419921875|unsuper_loss: 0.0
-average reward score: 5.55859375
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.63s (71.55%) |Training time=0.55s (24.14%) |Others=0.10 (4.31%)|CurSamplesPerSec=14.04 |AvgSamplesPerSec=14.44
-epoch: 0|step: 2811|ppo_ep: 1|act_loss: -0.0279998779296875|cri_loss: -0.01287841796875|unsuper_loss: 0.0
-average reward score: 5.0
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.59%) |Training time=0.47s (21.83%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2812|ppo_ep: 1|act_loss: -0.038330078125|cri_loss: -0.0186614990234375|unsuper_loss: 0.0
-average reward score: 5.25390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.16%) |Training time=0.48s (22.31%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2813|ppo_ep: 1|act_loss: 0.032928466796875|cri_loss: 0.017578125|unsuper_loss: 0.0
-average reward score: 5.19140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.10%) |Training time=0.48s (22.35%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2814|ppo_ep: 1|act_loss: -0.0133514404296875|cri_loss: -0.006317138671875|unsuper_loss: 0.0
-average reward score: 5.7109375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.59%) |Training time=0.47s (21.83%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2815|ppo_ep: 1|act_loss: 0.0244140625|cri_loss: 0.01275634765625|unsuper_loss: 0.0
-average reward score: 5.12109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.89%) |Training time=0.49s (22.57%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2816|ppo_ep: 1|act_loss: -0.0089569091796875|cri_loss: -0.0037841796875|unsuper_loss: 0.0
-average reward score: 5.28515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.28%) |Training time=0.50s (23.21%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2817|ppo_ep: 1|act_loss: -0.0140380859375|cri_loss: -0.006824493408203125|unsuper_loss: 0.0
-average reward score: 5.35546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.02%) |Training time=0.48s (22.39%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2818|ppo_ep: 1|act_loss: 0.002536773681640625|cri_loss: 0.0032253265380859375|unsuper_loss: 0.0
-average reward score: 5.5546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.27%) |Training time=0.48s (22.14%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
-[2023-04-14 10:31:33,595] [INFO] [logging.py:96:log_dist] [Rank 0] step=2820, skipped=36, lr=[7.295181310874729e-06, 7.295181310874729e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:31:33,613] [INFO] [timer.py:199:stop] epoch=0/micro_step=2820/global_step=2820, RunningAvgSamplesPerSec=106.38886203729886, CurrSamplesPerSec=102.92391691103396, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:31:33,832] [INFO] [logging.py:96:log_dist] [Rank 0] step=2820, skipped=42, lr=[3.7848440557440147e-06, 3.7848440557440147e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2819|ppo_ep: 1|act_loss: 0.03643798828125|cri_loss: 0.021148681640625|unsuper_loss: 0.0
-average reward score: 4.859375
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.61s (69.73%) |Training time=0.60s (26.01%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.88 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2820|ppo_ep: 1|act_loss: -0.03076171875|cri_loss: -0.01486968994140625|unsuper_loss: 0.0
-average reward score: 5.1328125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.86%) |Training time=0.46s (21.57%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2821|ppo_ep: 1|act_loss: -0.05303955078125|cri_loss: -0.026031494140625|unsuper_loss: 0.0
-average reward score: 4.98828125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.48%) |Training time=0.47s (21.94%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.45
-[2023-04-14 10:31:40,440] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 2822|ppo_ep: 1|act_loss: -0.04193115234375|cri_loss: -0.0196075439453125|unsuper_loss: 0.0
-average reward score: 6.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.45%) |Training time=0.48s (20.77%) |Others=0.09 (3.78%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.45
-[2023-04-14 10:31:42,585] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 2823|ppo_ep: 1|act_loss: 0.017669677734375|cri_loss: 0.0095062255859375|unsuper_loss: 0.0
-average reward score: 5.2734375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.55%) |Training time=0.48s (22.38%) |Others=0.09 (4.07%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2824|ppo_ep: 1|act_loss: -0.035980224609375|cri_loss: -0.0175323486328125|unsuper_loss: 0.0
-average reward score: 5.15625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.31%) |Training time=0.48s (22.13%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2825|ppo_ep: 1|act_loss: 0.005672454833984375|cri_loss: 0.003009796142578125|unsuper_loss: 0.0
-average reward score: 5.50390625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.29%) |Training time=0.47s (21.70%) |Others=0.13 (6.01%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2826|ppo_ep: 1|act_loss: -0.0008726119995117188|cri_loss: -0.00019288063049316406|unsuper_loss: 0.0
-average reward score: 5.31640625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.49%) |Training time=0.47s (21.94%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2827|ppo_ep: 1|act_loss: -0.0009431838989257812|cri_loss: -7.43865966796875e-05|unsuper_loss: 0.0
-average reward score: 4.7890625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.43%) |Training time=0.47s (21.99%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2828|ppo_ep: 1|act_loss: 0.00569915771484375|cri_loss: 0.003047943115234375|unsuper_loss: 0.0
-average reward score: 5.125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.33%) |Training time=0.47s (22.10%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
-[2023-04-14 10:31:55,391] [INFO] [logging.py:96:log_dist] [Rank 0] step=2830, skipped=36, lr=[7.2792058503397775e-06, 7.2792058503397775e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:31:55,410] [INFO] [timer.py:199:stop] epoch=0/micro_step=2830/global_step=2830, RunningAvgSamplesPerSec=106.37661744892732, CurrSamplesPerSec=104.80478821218995, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:31:55,502] [INFO] [logging.py:96:log_dist] [Rank 0] step=2830, skipped=44, lr=[3.77823271519263e-06, 3.77823271519263e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2829|ppo_ep: 1|act_loss: 0.0153961181640625|cri_loss: 0.0088958740234375|unsuper_loss: 0.0
-average reward score: 4.9453125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.62%) |Training time=0.47s (21.81%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2830|ppo_ep: 1|act_loss: -0.013885498046875|cri_loss: -0.00673675537109375|unsuper_loss: 0.0
-average reward score: 5.1484375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.18%) |Training time=0.48s (22.24%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2831|ppo_ep: 1|act_loss: -0.035552978515625|cri_loss: -0.015960693359375|unsuper_loss: 0.0
-average reward score: 5.71875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.19%) |Training time=0.48s (22.21%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2832|ppo_ep: 1|act_loss: -0.01444244384765625|cri_loss: -0.0068206787109375|unsuper_loss: 0.0
-average reward score: 5.41796875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.49%) |Training time=0.47s (21.93%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2833|ppo_ep: 1|act_loss: -0.024932861328125|cri_loss: -0.01194000244140625|unsuper_loss: 0.0
-average reward score: 5.50390625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.55%) |Training time=0.47s (21.88%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2834|ppo_ep: 1|act_loss: 0.0003077983856201172|cri_loss: 0.00028443336486816406|unsuper_loss: 0.0
-average reward score: 5.77734375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.88%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2835|ppo_ep: 1|act_loss: -0.03741455078125|cri_loss: -0.017669677734375|unsuper_loss: 0.0
-average reward score: 5.36328125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.97%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2836|ppo_ep: 1|act_loss: -0.024627685546875|cri_loss: -0.0111541748046875|unsuper_loss: 0.0
-average reward score: 4.69921875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.76%) |Training time=0.44s (20.67%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2837|ppo_ep: 1|act_loss: -0.0279388427734375|cri_loss: -0.0135650634765625|unsuper_loss: 0.0
-average reward score: 5.10546875
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.39%) |Training time=0.47s (20.39%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.74 |AvgSamplesPerSec=14.45
-[2023-04-14 10:32:14,915] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 2838|ppo_ep: 1|act_loss: -0.03369140625|cri_loss: -0.01508331298828125|unsuper_loss: 0.0
-average reward score: 5.00390625
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.42%) |Training time=0.45s (20.97%) |Others=0.10 (4.61%)|CurSamplesPerSec=15.06 |AvgSamplesPerSec=14.45
-[2023-04-14 10:32:17,061] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-[2023-04-14 10:32:17,062] [INFO] [logging.py:96:log_dist] [Rank 0] step=2840, skipped=38, lr=[7.266399278906688e-06, 7.266399278906688e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:32:17,062] [INFO] [timer.py:199:stop] epoch=0/micro_step=2840/global_step=2840, RunningAvgSamplesPerSec=106.37877097595695, CurrSamplesPerSec=116.98970678860347, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:32:17,154] [INFO] [logging.py:96:log_dist] [Rank 0] step=2840, skipped=44, lr=[3.769951495013317e-06, 3.769951495013317e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2839|ppo_ep: 1|act_loss: -0.0142669677734375|cri_loss: -0.006763458251953125|unsuper_loss: 0.0
-average reward score: 5.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.15%) |Training time=0.44s (20.29%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2840|ppo_ep: 1|act_loss: -0.00069427490234375|cri_loss: 0.0004968643188476562|unsuper_loss: 0.0
-average reward score: 5.0859375
--------------------------------------------------------------------------------------
-|E2E latency=2.45s |Gather latency=0.00s (0.00%) |Generate time=1.68s (68.55%) |Training time=0.47s (19.18%) |Others=0.30 (12.27%)|CurSamplesPerSec=13.07 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2841|ppo_ep: 1|act_loss: 0.0382080078125|cri_loss: 0.0198822021484375|unsuper_loss: 0.0
-average reward score: 5.45703125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.94%) |Training time=0.46s (21.47%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2842|ppo_ep: 1|act_loss: 0.035308837890625|cri_loss: 0.0183868408203125|unsuper_loss: 0.0
-average reward score: 5.484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.11%) |Training time=0.48s (22.33%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2843|ppo_ep: 1|act_loss: -0.0150146484375|cri_loss: -0.007251739501953125|unsuper_loss: 0.0
-average reward score: 4.95703125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.51%) |Training time=0.47s (21.91%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2844|ppo_ep: 1|act_loss: -0.037109375|cri_loss: -0.017974853515625|unsuper_loss: 0.0
-average reward score: 4.9296875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.09%) |Training time=0.48s (22.36%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2845|ppo_ep: 1|act_loss: -0.0166168212890625|cri_loss: -0.007476806640625|unsuper_loss: 0.0
-average reward score: 5.37109375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.96%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2846|ppo_ep: 1|act_loss: -0.0270538330078125|cri_loss: -0.01276397705078125|unsuper_loss: 0.0
-average reward score: 5.375
--------------------------------------------------------------------------------------
-|E2E latency=3.05s |Gather latency=0.00s (0.00%) |Generate time=1.58s (51.83%) |Training time=0.46s (15.23%) |Others=1.00 (32.94%)|CurSamplesPerSec=10.50 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2847|ppo_ep: 1|act_loss: -0.033111572265625|cri_loss: -0.01593017578125|unsuper_loss: 0.0
-average reward score: 5.75
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.66%) |Training time=0.47s (21.74%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2848|ppo_ep: 1|act_loss: -0.0134124755859375|cri_loss: -0.006237030029296875|unsuper_loss: 0.0
-average reward score: 5.95703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.77%) |Training time=0.47s (21.72%) |Others=0.12 (5.51%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.45
-[2023-04-14 10:32:39,758] [INFO] [logging.py:96:log_dist] [Rank 0] step=2850, skipped=38, lr=[7.2503585101744275e-06, 7.2503585101744275e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:32:39,776] [INFO] [timer.py:199:stop] epoch=0/micro_step=2850/global_step=2850, RunningAvgSamplesPerSec=106.37187852437273, CurrSamplesPerSec=110.91549223611467, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:32:39,869] [INFO] [logging.py:96:log_dist] [Rank 0] step=2850, skipped=44, lr=[3.7616514510892553e-06, 3.7616514510892553e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2849|ppo_ep: 1|act_loss: -0.0085906982421875|cri_loss: -0.002605438232421875|unsuper_loss: 0.0
-average reward score: 5.4609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.57%) |Training time=0.45s (20.86%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2850|ppo_ep: 1|act_loss: 0.0133056640625|cri_loss: 0.00717926025390625|unsuper_loss: 0.0
-average reward score: 5.35546875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.34%) |Training time=0.48s (22.14%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2851|ppo_ep: 1|act_loss: 0.03900146484375|cri_loss: 0.021392822265625|unsuper_loss: 0.0
-average reward score: 5.0234375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.86%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2852|ppo_ep: 1|act_loss: 0.02392578125|cri_loss: 0.01236724853515625|unsuper_loss: 0.0
-average reward score: 5.390625
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.74s (75.60%) |Training time=0.46s (20.13%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.87 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2853|ppo_ep: 1|act_loss: 0.0272216796875|cri_loss: 0.01763916015625|unsuper_loss: 0.0
-average reward score: 4.8515625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.47s (21.99%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2854|ppo_ep: 1|act_loss: 0.0024585723876953125|cri_loss: 0.001834869384765625|unsuper_loss: 0.0
-average reward score: 5.734375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.67%) |Training time=0.49s (22.60%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2855|ppo_ep: 1|act_loss: -0.029083251953125|cri_loss: -0.0135650634765625|unsuper_loss: 0.0
-average reward score: 4.68359375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.96%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2856|ppo_ep: 1|act_loss: -0.001056671142578125|cri_loss: -0.0002918243408203125|unsuper_loss: 0.0
-average reward score: 5.53515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.86%) |Training time=0.49s (22.56%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2857|ppo_ep: 1|act_loss: -0.02532958984375|cri_loss: -0.01251983642578125|unsuper_loss: 0.0
-average reward score: 4.75390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.86%) |Training time=0.49s (22.59%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2858|ppo_ep: 1|act_loss: -0.033660888671875|cri_loss: -0.0146331787109375|unsuper_loss: 0.0
-average reward score: 5.171875
--------------------------------------------------------------------------------------
-|E2E latency=2.95s |Gather latency=0.00s (0.00%) |Generate time=1.58s (53.57%) |Training time=0.50s (16.83%) |Others=0.87 (29.60%)|CurSamplesPerSec=10.85 |AvgSamplesPerSec=14.45
-[2023-04-14 10:33:02,332] [INFO] [logging.py:96:log_dist] [Rank 0] step=2860, skipped=38, lr=[7.234281791778185e-06, 7.234281791778185e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:33:02,351] [INFO] [timer.py:199:stop] epoch=0/micro_step=2860/global_step=2860, RunningAvgSamplesPerSec=106.34976605325895, CurrSamplesPerSec=101.42614741121662, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:33:02,444] [INFO] [logging.py:96:log_dist] [Rank 0] step=2860, skipped=44, lr=[3.7533327064471216e-06, 3.7533327064471216e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2859|ppo_ep: 1|act_loss: 0.00896453857421875|cri_loss: 0.00466156005859375|unsuper_loss: 0.0
-average reward score: 5.48828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.30%) |Training time=0.48s (22.12%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2860|ppo_ep: 1|act_loss: 0.04473876953125|cri_loss: 0.0229949951171875|unsuper_loss: 0.0
-average reward score: 5.2421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.49s (22.40%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2861|ppo_ep: 1|act_loss: -0.00661468505859375|cri_loss: -0.002811431884765625|unsuper_loss: 0.0
-average reward score: 5.1328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.71%) |Training time=0.49s (22.76%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2862|ppo_ep: 1|act_loss: 0.0772705078125|cri_loss: 0.0430908203125|unsuper_loss: 0.0
-average reward score: 5.4453125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.78%) |Training time=0.46s (21.63%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2863|ppo_ep: 1|act_loss: 0.0021076202392578125|cri_loss: 0.0012416839599609375|unsuper_loss: 0.0
-average reward score: 5.20703125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.69%) |Training time=0.47s (21.70%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2864|ppo_ep: 1|act_loss: -0.037628173828125|cri_loss: -0.017913818359375|unsuper_loss: 0.0
-average reward score: 5.50390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.47s (21.85%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2865|ppo_ep: 1|act_loss: -0.0204315185546875|cri_loss: -0.0097503662109375|unsuper_loss: 0.0
-average reward score: 4.85546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.06%) |Training time=0.48s (22.38%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2866|ppo_ep: 1|act_loss: -0.007228851318359375|cri_loss: -0.0034275054931640625|unsuper_loss: 0.0
-average reward score: 4.875
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.74s (74.63%) |Training time=0.49s (21.17%) |Others=0.10 (4.20%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2867|ppo_ep: 1|act_loss: -0.015838623046875|cri_loss: -0.007709503173828125|unsuper_loss: 0.0
-average reward score: 5.40625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.04%) |Training time=0.49s (22.43%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2868|ppo_ep: 1|act_loss: -0.0140533447265625|cri_loss: -0.006755828857421875|unsuper_loss: 0.0
-average reward score: 5.16796875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.38%) |Training time=0.49s (22.14%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.45
-[2023-04-14 10:33:24,269] [INFO] [logging.py:96:log_dist] [Rank 0] step=2870, skipped=38, lr=[7.2181693620137145e-06, 7.2181693620137145e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:33:24,287] [INFO] [timer.py:199:stop] epoch=0/micro_step=2870/global_step=2870, RunningAvgSamplesPerSec=106.32509058550373, CurrSamplesPerSec=95.30749151435104, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:33:24,380] [INFO] [logging.py:96:log_dist] [Rank 0] step=2870, skipped=44, lr=[3.7449953843907764e-06, 3.7449953843907764e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2869|ppo_ep: 1|act_loss: -0.019073486328125|cri_loss: -0.00899505615234375|unsuper_loss: 0.0
-average reward score: 5.3984375
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.68s (73.84%) |Training time=0.50s (21.87%) |Others=0.10 (4.29%)|CurSamplesPerSec=14.05 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2870|ppo_ep: 1|act_loss: -0.05633544921875|cri_loss: -0.0259857177734375|unsuper_loss: 0.0
-average reward score: 5.1171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.62%) |Training time=0.50s (22.87%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2871|ppo_ep: 1|act_loss: 0.0181427001953125|cri_loss: 0.00942230224609375|unsuper_loss: 0.0
-average reward score: 5.8125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.89%) |Training time=0.49s (22.57%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2872|ppo_ep: 1|act_loss: 0.0108642578125|cri_loss: 0.005611419677734375|unsuper_loss: 0.0
-average reward score: 5.11328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.69%) |Training time=0.49s (22.71%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2873|ppo_ep: 1|act_loss: 0.064453125|cri_loss: 0.033233642578125|unsuper_loss: 0.0
-average reward score: 5.5234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.75%) |Training time=0.49s (22.73%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2874|ppo_ep: 1|act_loss: 0.0105133056640625|cri_loss: 0.00571441650390625|unsuper_loss: 0.0
-average reward score: 5.421875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.26%) |Training time=0.48s (22.20%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2875|ppo_ep: 1|act_loss: -0.03668212890625|cri_loss: -0.0172271728515625|unsuper_loss: 0.0
-average reward score: 5.26953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.14%) |Training time=0.48s (22.30%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2876|ppo_ep: 1|act_loss: 0.009979248046875|cri_loss: 0.00543212890625|unsuper_loss: 0.0
-average reward score: 5.0703125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.12%) |Training time=0.48s (22.32%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2877|ppo_ep: 1|act_loss: -0.024017333984375|cri_loss: -0.01128387451171875|unsuper_loss: 0.0
-average reward score: 5.1328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.71%) |Training time=0.49s (22.76%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2878|ppo_ep: 1|act_loss: -0.0335693359375|cri_loss: -0.0164337158203125|unsuper_loss: 0.0
-average reward score: 4.734375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.78%) |Training time=0.49s (22.33%) |Others=0.13 (5.88%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.45
-[2023-04-14 10:33:45,963] [INFO] [logging.py:96:log_dist] [Rank 0] step=2880, skipped=38, lr=[7.20202145970609e-06, 7.20202145970609e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:33:45,981] [INFO] [timer.py:199:stop] epoch=0/micro_step=2880/global_step=2880, RunningAvgSamplesPerSec=106.29718321043626, CurrSamplesPerSec=108.31802231928128, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:33:46,081] [INFO] [logging.py:96:log_dist] [Rank 0] step=2880, skipped=44, lr=[3.736639608499448e-06, 3.736639608499448e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2879|ppo_ep: 1|act_loss: -0.027557373046875|cri_loss: -0.01270294189453125|unsuper_loss: 0.0
-average reward score: 5.40625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.23%) |Others=0.11 (4.89%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2880|ppo_ep: 1|act_loss: -0.0287933349609375|cri_loss: -0.0141143798828125|unsuper_loss: 0.0
-average reward score: 5.84765625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.13%) |Training time=0.46s (21.27%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2881|ppo_ep: 1|act_loss: -0.0151824951171875|cri_loss: -0.006744384765625|unsuper_loss: 0.0
-average reward score: 5.1640625
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.76s (76.27%) |Training time=0.45s (19.46%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.88 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2882|ppo_ep: 1|act_loss: 0.0058746337890625|cri_loss: 0.003101348876953125|unsuper_loss: 0.0
-average reward score: 5.203125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.02%) |Training time=0.46s (21.41%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2883|ppo_ep: 1|act_loss: 0.01129150390625|cri_loss: 0.0064697265625|unsuper_loss: 0.0
-average reward score: 5.9609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.90%) |Training time=0.46s (21.50%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2884|ppo_ep: 1|act_loss: -0.00933837890625|cri_loss: -0.003841400146484375|unsuper_loss: 0.0
-average reward score: 5.28125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.35%) |Training time=0.45s (21.05%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2885|ppo_ep: 1|act_loss: -0.0268402099609375|cri_loss: -0.0126953125|unsuper_loss: 0.0
-average reward score: 5.79296875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.70%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2886|ppo_ep: 1|act_loss: 0.00786590576171875|cri_loss: 0.004169464111328125|unsuper_loss: 0.0
-average reward score: 5.4765625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.73%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2887|ppo_ep: 1|act_loss: -0.00860595703125|cri_loss: -0.004085540771484375|unsuper_loss: 0.0
-average reward score: 5.53515625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.96%) |Training time=0.49s (22.49%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2888|ppo_ep: 1|act_loss: 0.017120361328125|cri_loss: 0.00911712646484375|unsuper_loss: 0.0
-average reward score: 5.16015625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.43%) |Training time=0.45s (20.87%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.45
-[2023-04-14 10:34:07,712] [INFO] [logging.py:96:log_dist] [Rank 0] step=2890, skipped=38, lr=[7.185838324206182e-06, 7.185838324206182e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:34:07,730] [INFO] [timer.py:199:stop] epoch=0/micro_step=2890/global_step=2890, RunningAvgSamplesPerSec=106.30418029404586, CurrSamplesPerSec=124.88239437340486, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:34:07,822] [INFO] [logging.py:96:log_dist] [Rank 0] step=2890, skipped=44, lr=[3.7282655026258895e-06, 3.7282655026258895e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2889|ppo_ep: 1|act_loss: -0.0262298583984375|cri_loss: -0.01256561279296875|unsuper_loss: 0.0
-average reward score: 4.9765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.13%) |Training time=0.42s (19.31%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2890|ppo_ep: 1|act_loss: -0.00704193115234375|cri_loss: -0.0017490386962890625|unsuper_loss: 0.0
-average reward score: 5.2578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.68%) |Training time=0.47s (21.77%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2891|ppo_ep: 1|act_loss: 0.01342010498046875|cri_loss: 0.0097503662109375|unsuper_loss: 0.0
-average reward score: 5.26953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.30%) |Training time=0.46s (21.15%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2892|ppo_ep: 1|act_loss: 0.007518768310546875|cri_loss: 0.00547027587890625|unsuper_loss: 0.0
-average reward score: 5.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.92%) |Training time=0.47s (21.54%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2893|ppo_ep: 1|act_loss: 0.09027099609375|cri_loss: 0.05419921875|unsuper_loss: 0.0
-average reward score: 5.16015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.78%) |Training time=0.47s (21.65%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2894|ppo_ep: 1|act_loss: 0.01409912109375|cri_loss: 0.00913238525390625|unsuper_loss: 0.0
-average reward score: 3.931640625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.80%) |Training time=0.43s (19.65%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2895|ppo_ep: 1|act_loss: 0.0149078369140625|cri_loss: 0.0116729736328125|unsuper_loss: 0.0
-average reward score: 5.12109375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.99%) |Training time=0.44s (20.34%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2896|ppo_ep: 1|act_loss: 0.025390625|cri_loss: 0.01457977294921875|unsuper_loss: 0.0
-average reward score: 4.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.22%) |Training time=0.48s (20.54%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2897|ppo_ep: 1|act_loss: 0.033203125|cri_loss: 0.0169677734375|unsuper_loss: 0.0
-average reward score: 5.1640625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.75%) |Training time=0.48s (21.76%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2898|ppo_ep: 1|act_loss: 0.0023441314697265625|cri_loss: 0.00432586669921875|unsuper_loss: 0.0
-average reward score: 5.03125
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.66%) |Training time=0.48s (21.01%) |Others=0.10 (4.33%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.45
-[2023-04-14 10:34:29,698] [INFO] [logging.py:96:log_dist] [Rank 0] step=2900, skipped=38, lr=[7.169620195387097e-06, 7.169620195387097e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:34:29,717] [INFO] [timer.py:199:stop] epoch=0/micro_step=2900/global_step=2900, RunningAvgSamplesPerSec=106.30332220155994, CurrSamplesPerSec=100.94465398682478, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:34:29,809] [INFO] [logging.py:96:log_dist] [Rank 0] step=2900, skipped=44, lr=[3.719873190894554e-06, 3.719873190894554e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2899|ppo_ep: 1|act_loss: 0.0145416259765625|cri_loss: 0.007717132568359375|unsuper_loss: 0.0
-average reward score: 5.48046875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.26%) |Training time=0.48s (22.21%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2900|ppo_ep: 1|act_loss: 0.007343292236328125|cri_loss: 0.0048675537109375|unsuper_loss: 0.0
-average reward score: 5.23828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.67%) |Training time=0.47s (21.79%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2901|ppo_ep: 1|act_loss: 0.00492095947265625|cri_loss: 0.00273895263671875|unsuper_loss: 0.0
-average reward score: 5.69140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.82%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2902|ppo_ep: 1|act_loss: 0.000827789306640625|cri_loss: 0.001277923583984375|unsuper_loss: 0.0
-average reward score: 5.17578125
--------------------------------------------------------------------------------------
-|E2E latency=2.50s |Gather latency=0.00s (0.00%) |Generate time=1.59s (63.82%) |Training time=0.47s (18.99%) |Others=0.43 (17.19%)|CurSamplesPerSec=12.82 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2903|ppo_ep: 1|act_loss: -0.0103912353515625|cri_loss: -0.004791259765625|unsuper_loss: 0.0
-average reward score: 5.3671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.74%) |Training time=0.47s (21.73%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2904|ppo_ep: 1|act_loss: 0.003299713134765625|cri_loss: 0.0017385482788085938|unsuper_loss: 0.0
-average reward score: 5.2578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.48s (21.97%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2905|ppo_ep: 1|act_loss: -0.005413055419921875|cri_loss: -0.0025043487548828125|unsuper_loss: 0.0
-average reward score: 5.4765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.85%) |Training time=0.47s (21.60%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2906|ppo_ep: 1|act_loss: 0.001773834228515625|cri_loss: 0.0010547637939453125|unsuper_loss: 0.0
-average reward score: 5.65625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.86%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2907|ppo_ep: 1|act_loss: 0.0262451171875|cri_loss: 0.01409912109375|unsuper_loss: 0.0
-average reward score: 5.90234375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.80%) |Training time=0.49s (22.18%) |Others=0.13 (6.02%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2908|ppo_ep: 1|act_loss: 0.00876617431640625|cri_loss: 0.0050048828125|unsuper_loss: 0.0
-average reward score: 5.6953125
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.33%) |Training time=0.45s (20.96%) |Others=0.10 (4.71%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.45
-[2023-04-14 10:34:51,677] [INFO] [logging.py:96:log_dist] [Rank 0] step=2910, skipped=38, lr=[7.1533673136406274e-06, 7.1533673136406274e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:34:51,695] [INFO] [timer.py:199:stop] epoch=0/micro_step=2910/global_step=2910, RunningAvgSamplesPerSec=106.29643390840222, CurrSamplesPerSec=104.84285647330844, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:34:51,788] [INFO] [logging.py:96:log_dist] [Rank 0] step=2910, skipped=44, lr=[3.7114627976997454e-06, 3.7114627976997454e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2909|ppo_ep: 1|act_loss: 0.01393890380859375|cri_loss: 0.008758544921875|unsuper_loss: 0.0
-average reward score: 5.07421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.86%) |Training time=0.47s (21.60%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2910|ppo_ep: 1|act_loss: -0.00762939453125|cri_loss: -0.0035247802734375|unsuper_loss: 0.0
-average reward score: 5.40234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.95%) |Training time=0.49s (22.51%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2911|ppo_ep: 1|act_loss: 0.01641845703125|cri_loss: 0.00928497314453125|unsuper_loss: 0.0
-average reward score: 5.37109375
--------------------------------------------------------------------------------------
-|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.58s (70.96%) |Training time=0.48s (21.42%) |Others=0.17 (7.62%)|CurSamplesPerSec=14.33 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2912|ppo_ep: 1|act_loss: -0.03759765625|cri_loss: -0.0178375244140625|unsuper_loss: 0.0
-average reward score: 5.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.15%) |Training time=0.48s (22.16%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2913|ppo_ep: 1|act_loss: -0.00995635986328125|cri_loss: -0.004405975341796875|unsuper_loss: 0.0
-average reward score: 5.09375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.48s (22.01%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2914|ppo_ep: 1|act_loss: -0.023712158203125|cri_loss: -0.0115509033203125|unsuper_loss: 0.0
-average reward score: 5.6171875
--------------------------------------------------------------------------------------
-|E2E latency=2.62s |Gather latency=0.00s (0.00%) |Generate time=1.58s (60.47%) |Training time=0.48s (18.30%) |Others=0.56 (21.22%)|CurSamplesPerSec=12.22 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2915|ppo_ep: 1|act_loss: -0.03619384765625|cri_loss: -0.0172119140625|unsuper_loss: 0.0
-average reward score: 5.4609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.13%) |Training time=0.48s (22.30%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2916|ppo_ep: 1|act_loss: 0.022613525390625|cri_loss: 0.014739990234375|unsuper_loss: 0.0
-average reward score: 5.29296875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.51%) |Training time=0.47s (21.95%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2917|ppo_ep: 1|act_loss: 0.03131103515625|cri_loss: 0.016082763671875|unsuper_loss: 0.0
-average reward score: 5.28125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.23%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2918|ppo_ep: 1|act_loss: 0.025970458984375|cri_loss: 0.0135650634765625|unsuper_loss: 0.0
-average reward score: 5.84375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.46%) |Training time=0.47s (21.99%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45
-[2023-04-14 10:35:13,853] [INFO] [logging.py:96:log_dist] [Rank 0] step=2920, skipped=38, lr=[7.1370799198736894e-06, 7.1370799198736894e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:35:13,872] [INFO] [timer.py:199:stop] epoch=0/micro_step=2920/global_step=2920, RunningAvgSamplesPerSec=106.27708410449475, CurrSamplesPerSec=102.26821669159538, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:35:13,964] [INFO] [logging.py:96:log_dist] [Rank 0] step=2920, skipped=44, lr=[3.7030344477037794e-06, 3.7030344477037794e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2919|ppo_ep: 1|act_loss: -0.017486572265625|cri_loss: -0.00734710693359375|unsuper_loss: 0.0
-average reward score: 5.1484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.47%) |Training time=0.48s (22.00%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2920|ppo_ep: 1|act_loss: 0.004459381103515625|cri_loss: 0.003025054931640625|unsuper_loss: 0.0
-average reward score: 5.109375
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.59s (67.90%) |Training time=0.48s (20.49%) |Others=0.27 (11.61%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2921|ppo_ep: 1|act_loss: -0.0131072998046875|cri_loss: -0.005886077880859375|unsuper_loss: 0.0
-average reward score: 5.5
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.82%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2922|ppo_ep: 1|act_loss: -0.017913818359375|cri_loss: -0.008392333984375|unsuper_loss: 0.0
-average reward score: 5.14453125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.65%) |Training time=0.48s (21.85%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2923|ppo_ep: 1|act_loss: 0.001232147216796875|cri_loss: 0.0011682510375976562|unsuper_loss: 0.0
-average reward score: 5.41015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (22.04%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.45
-[2023-04-14 10:35:24,979] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 2924|ppo_ep: 1|act_loss: 0.05810546875|cri_loss: 0.03289794921875|unsuper_loss: 0.0
-average reward score: 5.9375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.47s (21.72%) |Others=0.09 (4.08%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
-[2023-04-14 10:35:27,132] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 2925|ppo_ep: 1|act_loss: 0.0648193359375|cri_loss: 0.034698486328125|unsuper_loss: 0.0
-average reward score: 5.36328125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.48s (22.17%) |Others=0.09 (4.08%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2926|ppo_ep: 1|act_loss: 0.02801513671875|cri_loss: 0.01515960693359375|unsuper_loss: 0.0
-average reward score: 5.953125
--------------------------------------------------------------------------------------
-|E2E latency=2.83s |Gather latency=0.00s (0.00%) |Generate time=1.78s (62.85%) |Training time=0.47s (16.68%) |Others=0.58 (20.46%)|CurSamplesPerSec=11.31 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2927|ppo_ep: 1|act_loss: 0.0149688720703125|cri_loss: 0.00780487060546875|unsuper_loss: 0.0
-average reward score: 5.49609375
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.58%) |Training time=0.48s (21.07%) |Others=0.10 (4.35%)|CurSamplesPerSec=14.12 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2928|ppo_ep: 1|act_loss: 0.00994110107421875|cri_loss: 0.0059051513671875|unsuper_loss: 0.0
-average reward score: 5.9296875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.05%) |Training time=0.49s (22.43%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.45
-[2023-04-14 10:35:36,456] [INFO] [logging.py:96:log_dist] [Rank 0] step=2930, skipped=38, lr=[7.120758255504751e-06, 7.120758255504751e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:35:36,474] [INFO] [timer.py:199:stop] epoch=0/micro_step=2930/global_step=2930, RunningAvgSamplesPerSec=106.26202270186438, CurrSamplesPerSec=101.59874100814726, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:35:36,567] [INFO] [logging.py:96:log_dist] [Rank 0] step=2930, skipped=46, lr=[3.6962789227532165e-06, 3.6962789227532165e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2929|ppo_ep: 1|act_loss: 0.00696563720703125|cri_loss: 0.004253387451171875|unsuper_loss: 0.0
-average reward score: 5.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.48s (22.09%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2930|ppo_ep: 1|act_loss: 0.020294189453125|cri_loss: 0.01128387451171875|unsuper_loss: 0.0
-average reward score: 5.12890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.48s (22.01%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2931|ppo_ep: 1|act_loss: -0.0184783935546875|cri_loss: -0.00890350341796875|unsuper_loss: 0.0
-average reward score: 4.8671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.48s (22.00%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2932|ppo_ep: 1|act_loss: -0.0265960693359375|cri_loss: -0.01244354248046875|unsuper_loss: 0.0
-average reward score: 5.16015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.22%) |Training time=0.48s (22.19%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2933|ppo_ep: 1|act_loss: -0.001399993896484375|cri_loss: -0.00023174285888671875|unsuper_loss: 0.0
-average reward score: 5.28515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.12%) |Training time=0.48s (22.34%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2934|ppo_ep: 1|act_loss: -0.023223876953125|cri_loss: -0.01146697998046875|unsuper_loss: 0.0
-average reward score: 5.21484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.34%) |Training time=0.48s (22.11%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2935|ppo_ep: 1|act_loss: -0.043487548828125|cri_loss: -0.021331787109375|unsuper_loss: 0.0
-average reward score: 4.94140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.13%) |Training time=0.48s (22.32%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2936|ppo_ep: 1|act_loss: -0.0113525390625|cri_loss: -0.00528717041015625|unsuper_loss: 0.0
-average reward score: 5.296875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.17%) |Training time=0.48s (22.27%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2937|ppo_ep: 1|act_loss: 0.0017881393432617188|cri_loss: 0.00162506103515625|unsuper_loss: 0.0
-average reward score: 5.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.14%) |Training time=0.48s (22.32%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2938|ppo_ep: 1|act_loss: 0.004360198974609375|cri_loss: 0.002471923828125|unsuper_loss: 0.0
-average reward score: 5.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.99s |Gather latency=0.00s (0.00%) |Generate time=1.58s (53.00%) |Training time=0.48s (16.19%) |Others=0.92 (30.80%)|CurSamplesPerSec=10.72 |AvgSamplesPerSec=14.45
-[2023-04-14 10:35:58,887] [INFO] [logging.py:96:log_dist] [Rank 0] step=2940, skipped=38, lr=[7.10440256246025e-06, 7.10440256246025e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:35:58,905] [INFO] [timer.py:199:stop] epoch=0/micro_step=2940/global_step=2940, RunningAvgSamplesPerSec=106.2466257993554, CurrSamplesPerSec=115.49485676915266, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:35:58,998] [INFO] [logging.py:96:log_dist] [Rank 0] step=2940, skipped=46, lr=[3.6878185655126163e-06, 3.6878185655126163e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2939|ppo_ep: 1|act_loss: 0.02459716796875|cri_loss: 0.0127716064453125|unsuper_loss: 0.0
-average reward score: 5.41796875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.94%) |Training time=0.44s (20.48%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
-[2023-04-14 10:36:01,038] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 2940|ppo_ep: 1|act_loss: -0.0093536376953125|cri_loss: -0.0039825439453125|unsuper_loss: 0.0
-average reward score: 4.94921875
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.23%) |Training time=0.45s (21.17%) |Others=0.10 (4.60%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.45
-[2023-04-14 10:36:03,170] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 2941|ppo_ep: 1|act_loss: 0.01451873779296875|cri_loss: 0.007579803466796875|unsuper_loss: 0.0
-average reward score: 5.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.31%) |Training time=0.45s (21.09%) |Others=0.10 (4.60%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2942|ppo_ep: 1|act_loss: 0.003055572509765625|cri_loss: 0.002071380615234375|unsuper_loss: 0.0
-average reward score: 5.15234375
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.25%) |Training time=0.48s (20.52%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2943|ppo_ep: 1|act_loss: -0.0129547119140625|cri_loss: -0.00557708740234375|unsuper_loss: 0.0
-average reward score: 5.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.23%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2944|ppo_ep: 1|act_loss: -0.022705078125|cri_loss: -0.0099639892578125|unsuper_loss: 0.0
-average reward score: 5.234375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.18%) |Training time=0.48s (22.27%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2945|ppo_ep: 1|act_loss: -0.002780914306640625|cri_loss: -0.001194000244140625|unsuper_loss: 0.0
-average reward score: 5.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.04%) |Training time=0.49s (22.42%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2946|ppo_ep: 1|act_loss: -0.0325927734375|cri_loss: -0.01580810546875|unsuper_loss: 0.0
-average reward score: 4.828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.34%) |Training time=0.48s (22.11%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2947|ppo_ep: 1|act_loss: 0.043701171875|cri_loss: 0.02294921875|unsuper_loss: 0.0
-average reward score: 5.7421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.23%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2948|ppo_ep: 1|act_loss: 0.03558349609375|cri_loss: 0.018707275390625|unsuper_loss: 0.0
-average reward score: 5.06640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.19%) |Training time=0.48s (22.26%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.45
-[2023-04-14 10:36:20,616] [INFO] [logging.py:96:log_dist] [Rank 0] step=2950, skipped=40, lr=[7.091293670274905e-06, 7.091293670274905e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:36:20,635] [INFO] [timer.py:199:stop] epoch=0/micro_step=2950/global_step=2950, RunningAvgSamplesPerSec=106.23428625978279, CurrSamplesPerSec=103.20907689290974, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:36:20,728] [INFO] [logging.py:96:log_dist] [Rank 0] step=2950, skipped=46, lr=[3.679340601935418e-06, 3.679340601935418e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2949|ppo_ep: 1|act_loss: 0.055755615234375|cri_loss: 0.029510498046875|unsuper_loss: 0.0
-average reward score: 5.734375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.89%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2950|ppo_ep: 1|act_loss: 0.02801513671875|cri_loss: 0.0146636962890625|unsuper_loss: 0.0
-average reward score: 5.12890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (22.08%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2951|ppo_ep: 1|act_loss: 0.028778076171875|cri_loss: 0.0155487060546875|unsuper_loss: 0.0
-average reward score: 4.46484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.21%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2952|ppo_ep: 1|act_loss: 0.002716064453125|cri_loss: 0.002109527587890625|unsuper_loss: 0.0
-average reward score: 5.390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.47%) |Training time=0.47s (21.98%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2953|ppo_ep: 1|act_loss: -0.032562255859375|cri_loss: -0.0157012939453125|unsuper_loss: 0.0
-average reward score: 4.70703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.31%) |Training time=0.48s (22.15%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2954|ppo_ep: 1|act_loss: -0.05206298828125|cri_loss: -0.0253143310546875|unsuper_loss: 0.0
-average reward score: 5.265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.78%) |Training time=0.47s (21.65%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2955|ppo_ep: 1|act_loss: -0.016204833984375|cri_loss: -0.007656097412109375|unsuper_loss: 0.0
-average reward score: 5.7578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.76%) |Training time=0.47s (21.52%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2956|ppo_ep: 1|act_loss: -0.006183624267578125|cri_loss: -0.0024566650390625|unsuper_loss: 0.0
-average reward score: 5.4921875
--------------------------------------------------------------------------------------
-|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.62s (70.42%) |Training time=0.52s (22.78%) |Others=0.16 (6.80%)|CurSamplesPerSec=13.89 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2957|ppo_ep: 1|act_loss: 0.0180206298828125|cri_loss: 0.00919342041015625|unsuper_loss: 0.0
-average reward score: 5.56640625
--------------------------------------------------------------------------------------
-|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.63s (69.64%) |Training time=0.47s (20.24%) |Others=0.24 (10.12%)|CurSamplesPerSec=13.63 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2958|ppo_ep: 1|act_loss: 0.006927490234375|cri_loss: 0.0037059783935546875|unsuper_loss: 0.0
-average reward score: 5.43359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.30%) |Training time=0.48s (22.15%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45
-[2023-04-14 10:36:42,565] [INFO] [logging.py:96:log_dist] [Rank 0] step=2960, skipped=40, lr=[7.07487733687673e-06, 7.07487733687673e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:36:42,583] [INFO] [timer.py:199:stop] epoch=0/micro_step=2960/global_step=2960, RunningAvgSamplesPerSec=106.21938333389814, CurrSamplesPerSec=99.69147854428071, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:36:42,675] [INFO] [logging.py:96:log_dist] [Rank 0] step=2960, skipped=46, lr=[3.6708451576854964e-06, 3.6708451576854964e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2959|ppo_ep: 1|act_loss: 0.02166748046875|cri_loss: 0.011566162109375|unsuper_loss: 0.0
-average reward score: 5.56640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.01%) |Training time=0.48s (22.45%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2960|ppo_ep: 1|act_loss: 0.03948974609375|cri_loss: 0.0203857421875|unsuper_loss: 0.0
-average reward score: 5.27734375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.48s (22.39%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2961|ppo_ep: 1|act_loss: 0.0328369140625|cri_loss: 0.0174560546875|unsuper_loss: 0.0
-average reward score: 5.90625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.39%) |Training time=0.47s (22.04%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2962|ppo_ep: 1|act_loss: -0.00704193115234375|cri_loss: -0.003192901611328125|unsuper_loss: 0.0
-average reward score: 5.66015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.13%) |Training time=0.48s (22.31%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2963|ppo_ep: 1|act_loss: 0.02923583984375|cri_loss: 0.0168609619140625|unsuper_loss: 0.0
-average reward score: 4.71484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.21%) |Training time=0.48s (22.24%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2964|ppo_ep: 1|act_loss: -0.00560760498046875|cri_loss: -0.002147674560546875|unsuper_loss: 0.0
-average reward score: 5.55859375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.48s (22.09%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2965|ppo_ep: 1|act_loss: -0.017333984375|cri_loss: -0.00814056396484375|unsuper_loss: 0.0
-average reward score: 5.6328125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.00%) |Training time=0.48s (22.45%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2966|ppo_ep: 1|act_loss: -0.05963134765625|cri_loss: -0.0282135009765625|unsuper_loss: 0.0
-average reward score: 5.359375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.21%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2967|ppo_ep: 1|act_loss: 0.060333251953125|cri_loss: 0.032135009765625|unsuper_loss: 0.0
-average reward score: 4.875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.19%) |Training time=0.48s (22.26%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2968|ppo_ep: 1|act_loss: -0.0307159423828125|cri_loss: -0.014678955078125|unsuper_loss: 0.0
-average reward score: 5.6328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.17%) |Training time=0.48s (22.29%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45
-[2023-04-14 10:37:04,143] [INFO] [logging.py:96:log_dist] [Rank 0] step=2970, skipped=40, lr=[7.0584276548688896e-06, 7.0584276548688896e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:37:04,161] [INFO] [timer.py:199:stop] epoch=0/micro_step=2970/global_step=2970, RunningAvgSamplesPerSec=106.20216768200422, CurrSamplesPerSec=108.36122310905577, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:37:04,254] [INFO] [logging.py:96:log_dist] [Rank 0] step=2970, skipped=46, lr=[3.662332358685826e-06, 3.662332358685826e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2969|ppo_ep: 1|act_loss: -0.0328369140625|cri_loss: -0.0156707763671875|unsuper_loss: 0.0
-average reward score: 5.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.26%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2970|ppo_ep: 1|act_loss: -0.018341064453125|cri_loss: -0.00885772705078125|unsuper_loss: 0.0
-average reward score: 5.56640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.75%) |Training time=0.49s (22.72%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2971|ppo_ep: 1|act_loss: 9.191036224365234e-05|cri_loss: 0.0001608133316040039|unsuper_loss: 0.0
-average reward score: 5.35546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.31%) |Training time=0.48s (22.13%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2972|ppo_ep: 1|act_loss: -0.028167724609375|cri_loss: -0.0137939453125|unsuper_loss: 0.0
-average reward score: 5.28515625
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.07%) |Training time=0.48s (20.72%) |Others=0.10 (4.21%)|CurSamplesPerSec=13.70 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2973|ppo_ep: 1|act_loss: -0.0010089874267578125|cri_loss: 0.0002651214599609375|unsuper_loss: 0.0
-average reward score: 5.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.25%) |Training time=0.48s (22.19%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2974|ppo_ep: 1|act_loss: 0.0003681182861328125|cri_loss: 0.00141143798828125|unsuper_loss: 0.0
-average reward score: 5.4453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.22%) |Training time=0.48s (22.23%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2975|ppo_ep: 1|act_loss: -0.0010356903076171875|cri_loss: -0.00015783309936523438|unsuper_loss: 0.0
-average reward score: 5.5859375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.11%) |Training time=0.48s (22.34%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2976|ppo_ep: 1|act_loss: 0.00823211669921875|cri_loss: 0.004405975341796875|unsuper_loss: 0.0
-average reward score: 5.5390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (22.13%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2977|ppo_ep: 1|act_loss: -0.0009083747863769531|cri_loss: -0.00011587142944335938|unsuper_loss: 0.0
-average reward score: 5.25
--------------------------------------------------------------------------------------
-|E2E latency=2.51s |Gather latency=0.00s (0.00%) |Generate time=1.58s (63.02%) |Training time=0.48s (19.08%) |Others=0.45 (17.90%)|CurSamplesPerSec=12.75 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2978|ppo_ep: 1|act_loss: -0.031768798828125|cri_loss: -0.01482391357421875|unsuper_loss: 0.0
-average reward score: 5.265625
--------------------------------------------------------------------------------------
-|E2E latency=2.06s |Gather latency=0.00s (0.00%) |Generate time=1.58s (76.46%) |Training time=0.39s (18.75%) |Others=0.10 (4.78%)|CurSamplesPerSec=15.52 |AvgSamplesPerSec=14.45
-[2023-04-14 10:37:26,167] [INFO] [logging.py:96:log_dist] [Rank 0] step=2980, skipped=40, lr=[7.041944868075353e-06, 7.041944868075353e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:37:26,185] [INFO] [timer.py:199:stop] epoch=0/micro_step=2980/global_step=2980, RunningAvgSamplesPerSec=106.19269914626885, CurrSamplesPerSec=101.53371919119633, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:37:26,278] [INFO] [logging.py:96:log_dist] [Rank 0] step=2980, skipped=46, lr=[3.6538023311166273e-06, 3.6538023311166273e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2979|ppo_ep: 1|act_loss: -0.02227783203125|cri_loss: -0.0102081298828125|unsuper_loss: 0.0
-average reward score: 5.7890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.30%) |Training time=0.48s (22.14%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2980|ppo_ep: 1|act_loss: -0.004032135009765625|cri_loss: -0.0018053054809570312|unsuper_loss: 0.0
-average reward score: 5.68359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.11%) |Training time=0.44s (20.34%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2981|ppo_ep: 1|act_loss: -0.040283203125|cri_loss: -0.018829345703125|unsuper_loss: 0.0
-average reward score: 5.66015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.08%) |Training time=0.46s (21.36%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2982|ppo_ep: 1|act_loss: -0.0193023681640625|cri_loss: -0.00920867919921875|unsuper_loss: 0.0
-average reward score: 5.58984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.84%) |Training time=0.47s (21.62%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2983|ppo_ep: 1|act_loss: -0.01230621337890625|cri_loss: -0.00579071044921875|unsuper_loss: 0.0
-average reward score: 5.50390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.05%) |Training time=0.44s (20.41%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2984|ppo_ep: 1|act_loss: 0.021392822265625|cri_loss: 0.01100921630859375|unsuper_loss: 0.0
-average reward score: 5.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.57s |Gather latency=0.00s (0.00%) |Generate time=1.62s (63.31%) |Training time=0.44s (17.34%) |Others=0.50 (19.34%)|CurSamplesPerSec=12.48 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2985|ppo_ep: 1|act_loss: 0.02447509765625|cri_loss: 0.01253509521484375|unsuper_loss: 0.0
-average reward score: 6.1328125
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.84%) |Training time=0.46s (20.90%) |Others=0.12 (5.27%)|CurSamplesPerSec=14.49 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2986|ppo_ep: 1|act_loss: 0.01194000244140625|cri_loss: 0.007659912109375|unsuper_loss: 0.0
-average reward score: 4.7265625
--------------------------------------------------------------------------------------
-|E2E latency=2.39s |Gather latency=0.00s (0.00%) |Generate time=1.84s (76.86%) |Training time=0.45s (19.00%) |Others=0.10 (4.14%)|CurSamplesPerSec=13.40 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2987|ppo_ep: 1|act_loss: 0.0084075927734375|cri_loss: 0.0046234130859375|unsuper_loss: 0.0
-average reward score: 5.3203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.16%) |Training time=0.46s (21.27%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2988|ppo_ep: 1|act_loss: 0.04998779296875|cri_loss: 0.026336669921875|unsuper_loss: 0.0
-average reward score: 5.23046875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.96%) |Training time=0.46s (21.48%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
-[2023-04-14 10:37:48,439] [INFO] [logging.py:96:log_dist] [Rank 0] step=2990, skipped=40, lr=[7.025429220810784e-06, 7.025429220810784e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:37:48,458] [INFO] [timer.py:199:stop] epoch=0/micro_step=2990/global_step=2990, RunningAvgSamplesPerSec=106.20281446283651, CurrSamplesPerSec=107.48080335693007, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:37:48,550] [INFO] [logging.py:96:log_dist] [Rank 0] step=2990, skipped=46, lr=[3.6452552014134834e-06, 3.6452552014134834e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2989|ppo_ep: 1|act_loss: 0.017669677734375|cri_loss: 0.00970458984375|unsuper_loss: 0.0
-average reward score: 6.1328125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.09%) |Training time=0.46s (21.36%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2990|ppo_ep: 1|act_loss: -0.0224609375|cri_loss: -0.01080322265625|unsuper_loss: 0.0
-average reward score: 6.0859375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.96%) |Training time=0.46s (21.48%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2991|ppo_ep: 1|act_loss: -0.01204681396484375|cri_loss: -0.00508880615234375|unsuper_loss: 0.0
-average reward score: 5.8046875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.02%) |Training time=0.46s (21.41%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2992|ppo_ep: 1|act_loss: 0.0279541015625|cri_loss: 0.01557159423828125|unsuper_loss: 0.0
-average reward score: 5.4921875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.92%) |Training time=0.47s (21.52%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2993|ppo_ep: 1|act_loss: 0.0250701904296875|cri_loss: 0.015655517578125|unsuper_loss: 0.0
-average reward score: 5.125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.96%) |Training time=0.46s (21.49%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2994|ppo_ep: 1|act_loss: -0.00518035888671875|cri_loss: -0.00228118896484375|unsuper_loss: 0.0
-average reward score: 5.3046875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.46s (21.44%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2995|ppo_ep: 1|act_loss: -0.0118560791015625|cri_loss: -0.00574493408203125|unsuper_loss: 0.0
-average reward score: 5.39453125
--------------------------------------------------------------------------------------
-|E2E latency=2.08s |Gather latency=0.00s (0.00%) |Generate time=1.60s (76.67%) |Training time=0.39s (18.57%) |Others=0.10 (4.76%)|CurSamplesPerSec=15.37 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2996|ppo_ep: 1|act_loss: 0.04974365234375|cri_loss: 0.03350830078125|unsuper_loss: 0.0
-average reward score: 5.8125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.43%) |Training time=0.45s (21.01%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2997|ppo_ep: 1|act_loss: 0.0386962890625|cri_loss: 0.020751953125|unsuper_loss: 0.0
-average reward score: 5.3125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.68%) |Training time=0.45s (20.76%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45
-epoch: 0|step: 2998|ppo_ep: 1|act_loss: -0.0017347335815429688|cri_loss: -0.000568389892578125|unsuper_loss: 0.0
-average reward score: 5.55078125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.26%) |Training time=0.46s (21.17%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
-[2023-04-14 10:38:10,330] [INFO] [logging.py:96:log_dist] [Rank 0] step=3000, skipped=40, lr=[7.008880957876916e-06, 7.008880957876916e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:38:10,349] [INFO] [timer.py:199:stop] epoch=0/micro_step=3000/global_step=3000, RunningAvgSamplesPerSec=106.215874634767, CurrSamplesPerSec=105.73411932827526, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:38:10,442] [INFO] [logging.py:96:log_dist] [Rank 0] step=3000, skipped=46, lr=[3.6366910962654785e-06, 3.6366910962654785e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 2999|ppo_ep: 1|act_loss: -0.0080108642578125|cri_loss: -0.0035762786865234375|unsuper_loss: 0.0
-average reward score: 5.44921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.05%) |Training time=0.47s (21.43%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3000|ppo_ep: 1|act_loss: 0.01529693603515625|cri_loss: 0.009674072265625|unsuper_loss: 0.0
-average reward score: 5.953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.07%) |Training time=0.46s (21.29%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3001|ppo_ep: 1|act_loss: 0.02740478515625|cri_loss: 0.01422882080078125|unsuper_loss: 0.0
-average reward score: 5.08203125
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.25%) |Training time=0.45s (19.50%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.82 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3002|ppo_ep: 1|act_loss: 0.023345947265625|cri_loss: 0.01261138916015625|unsuper_loss: 0.0
-average reward score: 5.57421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.16%) |Training time=0.46s (21.28%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3003|ppo_ep: 1|act_loss: 0.005279541015625|cri_loss: 0.00919342041015625|unsuper_loss: 0.0
-average reward score: 5.19921875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.46s (21.55%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3004|ppo_ep: 1|act_loss: -0.025146484375|cri_loss: -0.01141357421875|unsuper_loss: 0.0
-average reward score: 5.26953125
--------------------------------------------------------------------------------------
-|E2E latency=2.81s |Gather latency=0.00s (0.00%) |Generate time=1.58s (56.22%) |Training time=0.47s (16.82%) |Others=0.76 (26.95%)|CurSamplesPerSec=11.40 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3005|ppo_ep: 1|act_loss: 0.056640625|cri_loss: 0.031097412109375|unsuper_loss: 0.0
-average reward score: 5.23046875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.57%) |Training time=0.47s (21.85%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3006|ppo_ep: 1|act_loss: -0.0225372314453125|cri_loss: -0.0108642578125|unsuper_loss: 0.0
-average reward score: 5.3984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.43%) |Training time=0.47s (22.00%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45
-[2023-04-14 10:38:28,354] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
-[2023-04-14 10:38:28,440] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
-epoch: 0|step: 3007|ppo_ep: 1|act_loss: 0.048126220703125|cri_loss: 0.026763916015625|unsuper_loss: 0.0
-average reward score: 5.30859375
--------------------------------------------------------------------------------------
-|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.66%) |Training time=0.45s (21.13%) |Others=0.09 (4.22%)|CurSamplesPerSec=15.13 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3008|ppo_ep: 1|act_loss: -0.053314208984375|cri_loss: -0.0260467529296875|unsuper_loss: 0.0
-average reward score: 5.1328125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.76%) |Training time=0.46s (21.67%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45
-[2023-04-14 10:38:32,622] [INFO] [logging.py:96:log_dist] [Rank 0] step=3010, skipped=41, lr=[6.993959837557451e-06, 6.993959837557451e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:38:32,640] [INFO] [timer.py:199:stop] epoch=0/micro_step=3010/global_step=3010, RunningAvgSamplesPerSec=106.2159635052104, CurrSamplesPerSec=102.30953312005147, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:38:32,733] [INFO] [logging.py:96:log_dist] [Rank 0] step=3010, skipped=47, lr=[3.628968992538233e-06, 3.628968992538233e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3009|ppo_ep: 1|act_loss: 0.02825927734375|cri_loss: 0.01508331298828125|unsuper_loss: 0.0
-average reward score: 4.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.34%) |Training time=0.48s (22.10%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3010|ppo_ep: 1|act_loss: -0.006519317626953125|cri_loss: -0.001796722412109375|unsuper_loss: 0.0
-average reward score: 5.40234375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.48s (22.07%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3011|ppo_ep: 1|act_loss: -0.007564544677734375|cri_loss: -0.0034637451171875|unsuper_loss: 0.0
-average reward score: 5.6328125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.39%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3012|ppo_ep: 1|act_loss: 0.0518798828125|cri_loss: 0.0274505615234375|unsuper_loss: 0.0
-average reward score: 4.03515625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.96%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3013|ppo_ep: 1|act_loss: -0.00540924072265625|cri_loss: 0.00273895263671875|unsuper_loss: 0.0
-average reward score: 4.515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.28%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3014|ppo_ep: 1|act_loss: -0.0091400146484375|cri_loss: -0.003551483154296875|unsuper_loss: 0.0
-average reward score: 5.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.69%) |Training time=0.46s (20.81%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3015|ppo_ep: 1|act_loss: 0.028167724609375|cri_loss: 0.015838623046875|unsuper_loss: 0.0
-average reward score: 4.796875
--------------------------------------------------------------------------------------
-|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.69s (75.25%) |Training time=0.46s (20.36%) |Others=0.10 (4.39%)|CurSamplesPerSec=14.28 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3016|ppo_ep: 1|act_loss: 0.06402587890625|cri_loss: 0.035064697265625|unsuper_loss: 0.0
-average reward score: 4.43359375
--------------------------------------------------------------------------------------
-|E2E latency=2.45s |Gather latency=0.00s (0.00%) |Generate time=1.75s (71.39%) |Training time=0.47s (19.05%) |Others=0.23 (9.56%)|CurSamplesPerSec=13.05 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3017|ppo_ep: 1|act_loss: -0.022430419921875|cri_loss: -0.00969696044921875|unsuper_loss: 0.0
-average reward score: 4.5703125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.60%) |Training time=0.47s (21.83%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3018|ppo_ep: 1|act_loss: 0.001911163330078125|cri_loss: 0.0020999908447265625|unsuper_loss: 0.0
-average reward score: 5.29296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.73%) |Training time=0.47s (21.69%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
-[2023-04-14 10:38:54,564] [INFO] [logging.py:96:log_dist] [Rank 0] step=3020, skipped=41, lr=[6.977350281009214e-06, 6.977350281009214e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:38:54,583] [INFO] [timer.py:199:stop] epoch=0/micro_step=3020/global_step=3020, RunningAvgSamplesPerSec=106.21456349820467, CurrSamplesPerSec=103.59215705645721, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:38:54,675] [INFO] [logging.py:96:log_dist] [Rank 0] step=3020, skipped=47, lr=[3.6203729839730567e-06, 3.6203729839730567e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3019|ppo_ep: 1|act_loss: 0.04150390625|cri_loss: 0.02349853515625|unsuper_loss: 0.0
-average reward score: 5.0703125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.55%) |Training time=0.47s (21.90%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3020|ppo_ep: 1|act_loss: 0.0745849609375|cri_loss: 0.041107177734375|unsuper_loss: 0.0
-average reward score: 4.56640625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.70%) |Training time=0.47s (21.73%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3021|ppo_ep: 1|act_loss: 0.0186767578125|cri_loss: 0.0098724365234375|unsuper_loss: 0.0
-average reward score: 5.328125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.64%) |Training time=0.47s (21.79%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3022|ppo_ep: 1|act_loss: -0.0171051025390625|cri_loss: -0.00785064697265625|unsuper_loss: 0.0
-average reward score: 5.6640625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.46%) |Training time=0.47s (21.97%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3023|ppo_ep: 1|act_loss: 0.01088714599609375|cri_loss: 0.006252288818359375|unsuper_loss: 0.0
-average reward score: 5.78125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.61%) |Training time=0.47s (21.82%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3024|ppo_ep: 1|act_loss: 0.0006403923034667969|cri_loss: 0.0006952285766601562|unsuper_loss: 0.0
-average reward score: 4.79296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.54%) |Training time=0.47s (21.89%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3025|ppo_ep: 1|act_loss: -0.03167724609375|cri_loss: -0.01506805419921875|unsuper_loss: 0.0
-average reward score: 5.296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.80%) |Training time=0.47s (21.64%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3026|ppo_ep: 1|act_loss: -0.0455322265625|cri_loss: -0.021087646484375|unsuper_loss: 0.0
-average reward score: 5.57421875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.74%) |Training time=0.47s (21.68%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3027|ppo_ep: 1|act_loss: -0.01092529296875|cri_loss: -0.00153350830078125|unsuper_loss: 0.0
-average reward score: 5.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.34%) |Training time=0.48s (22.09%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3028|ppo_ep: 1|act_loss: -0.04278564453125|cri_loss: -0.0206298828125|unsuper_loss: 0.0
-average reward score: 5.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.58s (66.99%) |Training time=0.47s (19.91%) |Others=0.31 (13.10%)|CurSamplesPerSec=13.54 |AvgSamplesPerSec=14.45
-[2023-04-14 10:39:16,295] [INFO] [logging.py:96:log_dist] [Rank 0] step=3030, skipped=41, lr=[6.96070882143755e-06, 6.96070882143755e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:39:16,313] [INFO] [timer.py:199:stop] epoch=0/micro_step=3030/global_step=3030, RunningAvgSamplesPerSec=106.2086986497776, CurrSamplesPerSec=107.58660138577608, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:39:16,406] [INFO] [logging.py:96:log_dist] [Rank 0] step=3030, skipped=47, lr=[3.6117603687775048e-06, 3.6117603687775048e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3029|ppo_ep: 1|act_loss: 0.015106201171875|cri_loss: 0.007755279541015625|unsuper_loss: 0.0
-average reward score: 4.8203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.18%) |Training time=0.46s (21.27%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3030|ppo_ep: 1|act_loss: 0.0186767578125|cri_loss: 0.0110931396484375|unsuper_loss: 0.0
-average reward score: 4.78125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.56%) |Training time=0.47s (21.87%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3031|ppo_ep: 1|act_loss: 0.0296630859375|cri_loss: 0.016082763671875|unsuper_loss: 0.0
-average reward score: 4.80078125
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.74s (75.28%) |Training time=0.47s (20.47%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3032|ppo_ep: 1|act_loss: 0.04290771484375|cri_loss: 0.0226593017578125|unsuper_loss: 0.0
-average reward score: 5.2421875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.62%) |Training time=0.47s (21.81%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3033|ppo_ep: 1|act_loss: 0.0767822265625|cri_loss: 0.041412353515625|unsuper_loss: 0.0
-average reward score: 5.0078125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.86%) |Training time=0.46s (21.58%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3034|ppo_ep: 1|act_loss: 0.033294677734375|cri_loss: 0.0171356201171875|unsuper_loss: 0.0
-average reward score: 6.3203125
--------------------------------------------------------------------------------------
-|E2E latency=2.44s |Gather latency=0.00s (0.00%) |Generate time=1.58s (64.85%) |Training time=0.47s (19.34%) |Others=0.39 (15.81%)|CurSamplesPerSec=13.13 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3035|ppo_ep: 1|act_loss: 0.04876708984375|cri_loss: 0.027008056640625|unsuper_loss: 0.0
-average reward score: 5.0625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.50%) |Training time=0.47s (21.93%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3036|ppo_ep: 1|act_loss: -0.03179931640625|cri_loss: -0.015411376953125|unsuper_loss: 0.0
-average reward score: 5.8828125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.41%) |Training time=0.47s (21.87%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3037|ppo_ep: 1|act_loss: -0.0127716064453125|cri_loss: -0.00620269775390625|unsuper_loss: 0.0
-average reward score: 5.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.47s (22.07%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3038|ppo_ep: 1|act_loss: 0.031005859375|cri_loss: 0.0171051025390625|unsuper_loss: 0.0
-average reward score: 4.421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.24%) |Training time=0.46s (21.21%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.45
-[2023-04-14 10:39:38,258] [INFO] [logging.py:96:log_dist] [Rank 0] step=3040, skipped=41, lr=[6.944035705509035e-06, 6.944035705509035e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:39:38,277] [INFO] [timer.py:199:stop] epoch=0/micro_step=3040/global_step=3040, RunningAvgSamplesPerSec=106.20304657307675, CurrSamplesPerSec=106.6089857439244, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:39:38,455] [INFO] [logging.py:96:log_dist] [Rank 0] step=3040, skipped=47, lr=[3.603131274611312e-06, 3.603131274611312e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3039|ppo_ep: 1|act_loss: -0.053680419921875|cri_loss: -0.0252685546875|unsuper_loss: 0.0
-average reward score: 5.46484375
--------------------------------------------------------------------------------------
-|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.13%) |Training time=0.55s (24.52%) |Others=0.10 (4.35%)|CurSamplesPerSec=14.29 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3040|ppo_ep: 1|act_loss: -0.044677734375|cri_loss: -0.021453857421875|unsuper_loss: 0.0
-average reward score: 5.109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.84%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3041|ppo_ep: 1|act_loss: 0.014801025390625|cri_loss: 0.0083770751953125|unsuper_loss: 0.0
-average reward score: 4.95703125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.75%) |Training time=0.47s (21.67%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3042|ppo_ep: 1|act_loss: -0.0051422119140625|cri_loss: -0.001735687255859375|unsuper_loss: 0.0
-average reward score: 4.796875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.45%) |Training time=0.47s (22.00%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3043|ppo_ep: 1|act_loss: 0.008453369140625|cri_loss: 0.00479888916015625|unsuper_loss: 0.0
-average reward score: 5.5546875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.49%) |Training time=0.48s (22.00%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3044|ppo_ep: 1|act_loss: -0.00157928466796875|cri_loss: 4.9591064453125e-05|unsuper_loss: 0.0
-average reward score: 5.6328125
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.42%) |Training time=0.46s (20.21%) |Others=0.10 (4.37%)|CurSamplesPerSec=14.10 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3045|ppo_ep: 1|act_loss: 0.012237548828125|cri_loss: 0.00685882568359375|unsuper_loss: 0.0
-average reward score: 4.84765625
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.93%) |Training time=0.52s (23.55%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3046|ppo_ep: 1|act_loss: 0.008270263671875|cri_loss: 0.004291534423828125|unsuper_loss: 0.0
-average reward score: 5.58984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.56%) |Training time=0.47s (21.86%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3047|ppo_ep: 1|act_loss: -3.62396240234375e-05|cri_loss: 0.0002925395965576172|unsuper_loss: 0.0
-average reward score: 4.546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.50%) |Training time=0.47s (21.94%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3048|ppo_ep: 1|act_loss: 0.0095367431640625|cri_loss: 0.005397796630859375|unsuper_loss: 0.0
-average reward score: 5.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.68s |Gather latency=0.00s (0.00%) |Generate time=1.58s (58.98%) |Training time=0.47s (17.46%) |Others=0.63 (23.56%)|CurSamplesPerSec=11.93 |AvgSamplesPerSec=14.45
-[2023-04-14 10:40:00,590] [INFO] [logging.py:96:log_dist] [Rank 0] step=3050, skipped=41, lr=[6.92733118035947e-06, 6.92733118035947e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:40:00,608] [INFO] [timer.py:199:stop] epoch=0/micro_step=3050/global_step=3050, RunningAvgSamplesPerSec=106.19061216110067, CurrSamplesPerSec=103.65696085806124, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:40:00,701] [INFO] [logging.py:96:log_dist] [Rank 0] step=3050, skipped=47, lr=[3.59448582937847e-06, 3.59448582937847e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3049|ppo_ep: 1|act_loss: 0.01837158203125|cri_loss: 0.0163421630859375|unsuper_loss: 0.0
-average reward score: 4.5546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.53%) |Training time=0.47s (21.92%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3050|ppo_ep: 1|act_loss: 0.0167694091796875|cri_loss: 0.00876617431640625|unsuper_loss: 0.0
-average reward score: 5.53125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.44%) |Training time=0.47s (22.00%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3051|ppo_ep: 1|act_loss: 0.0504150390625|cri_loss: 0.0277099609375|unsuper_loss: 0.0
-average reward score: 4.2109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.44%) |Training time=0.47s (21.99%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3052|ppo_ep: 1|act_loss: 0.044677734375|cri_loss: 0.0234375|unsuper_loss: 0.0
-average reward score: 5.44921875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.37%) |Training time=0.47s (22.07%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3053|ppo_ep: 1|act_loss: 0.0081939697265625|cri_loss: 0.0041656494140625|unsuper_loss: 0.0
-average reward score: 5.09765625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.99%) |Training time=0.46s (21.44%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3054|ppo_ep: 1|act_loss: 0.019744873046875|cri_loss: 0.01055145263671875|unsuper_loss: 0.0
-average reward score: 5.13671875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.52%) |Training time=0.47s (21.90%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3055|ppo_ep: 1|act_loss: 0.00218963623046875|cri_loss: 0.001293182373046875|unsuper_loss: 0.0
-average reward score: 5.0
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.11%) |Training time=0.48s (22.35%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3056|ppo_ep: 1|act_loss: -0.0211181640625|cri_loss: -0.01013946533203125|unsuper_loss: 0.0
-average reward score: 5.37109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.47s (22.05%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3057|ppo_ep: 1|act_loss: 0.020233154296875|cri_loss: 0.0114288330078125|unsuper_loss: 0.0
-average reward score: 4.92578125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.48s (22.09%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3058|ppo_ep: 1|act_loss: 0.026153564453125|cri_loss: 0.01445770263671875|unsuper_loss: 0.0
-average reward score: 4.8203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.14%) |Training time=0.46s (21.32%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.45
-[2023-04-14 10:40:22,136] [INFO] [logging.py:96:log_dist] [Rank 0] step=3060, skipped=41, lr=[6.910595493590213e-06, 6.910595493590213e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:40:22,155] [INFO] [timer.py:199:stop] epoch=0/micro_step=3060/global_step=3060, RunningAvgSamplesPerSec=106.18132775281478, CurrSamplesPerSec=103.46374930236718, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:40:22,248] [INFO] [logging.py:96:log_dist] [Rank 0] step=3060, skipped=47, lr=[3.585824161225334e-06, 3.585824161225334e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3059|ppo_ep: 1|act_loss: -0.00453948974609375|cri_loss: -0.0020771026611328125|unsuper_loss: 0.0
-average reward score: 5.078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.72%) |Training time=0.47s (21.74%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3060|ppo_ep: 1|act_loss: -0.017425537109375|cri_loss: -0.00827789306640625|unsuper_loss: 0.0
-average reward score: 5.203125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.45%) |Training time=0.47s (21.98%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3061|ppo_ep: 1|act_loss: 0.00922393798828125|cri_loss: 0.004856109619140625|unsuper_loss: 0.0
-average reward score: 5.1015625
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.33%) |Training time=0.47s (20.44%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.80 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3062|ppo_ep: 1|act_loss: -0.030548095703125|cri_loss: -0.0146484375|unsuper_loss: 0.0
-average reward score: 4.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.42%) |Training time=0.47s (22.02%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3063|ppo_ep: 1|act_loss: 0.0113067626953125|cri_loss: 0.00652313232421875|unsuper_loss: 0.0
-average reward score: 4.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.57%) |Training time=0.47s (21.87%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3064|ppo_ep: 1|act_loss: 0.04510498046875|cri_loss: 0.024749755859375|unsuper_loss: 0.0
-average reward score: 4.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.58s (69.11%) |Training time=0.48s (20.89%) |Others=0.23 (10.00%)|CurSamplesPerSec=14.03 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3065|ppo_ep: 1|act_loss: -0.027740478515625|cri_loss: -0.0132293701171875|unsuper_loss: 0.0
-average reward score: 4.984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.48s (21.99%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3066|ppo_ep: 1|act_loss: 0.017669677734375|cri_loss: 0.0101318359375|unsuper_loss: 0.0
-average reward score: 4.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.86%) |Training time=0.46s (21.56%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3067|ppo_ep: 1|act_loss: -0.018463134765625|cri_loss: -0.009002685546875|unsuper_loss: 0.0
-average reward score: 5.54296875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.52%) |Training time=0.47s (21.92%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3068|ppo_ep: 1|act_loss: -0.0201416015625|cri_loss: -0.009796142578125|unsuper_loss: 0.0
-average reward score: 5.29296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.68%) |Training time=0.47s (21.75%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
-[2023-04-14 10:40:43,959] [INFO] [logging.py:96:log_dist] [Rank 0] step=3070, skipped=41, lr=[6.893828893264516e-06, 6.893828893264516e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:40:43,978] [INFO] [timer.py:199:stop] epoch=0/micro_step=3070/global_step=3070, RunningAvgSamplesPerSec=106.17196197617312, CurrSamplesPerSec=103.21756958575386, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:40:44,070] [INFO] [logging.py:96:log_dist] [Rank 0] step=3070, skipped=47, lr=[3.57714639853872e-06, 3.57714639853872e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3069|ppo_ep: 1|act_loss: 0.0130615234375|cri_loss: 0.00708770751953125|unsuper_loss: 0.0
-average reward score: 5.18359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.90%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3070|ppo_ep: 1|act_loss: -0.003082275390625|cri_loss: -0.0013217926025390625|unsuper_loss: 0.0
-average reward score: 4.98046875
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.58s (69.34%) |Training time=0.47s (20.42%) |Others=0.23 (10.24%)|CurSamplesPerSec=14.02 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3071|ppo_ep: 1|act_loss: 0.0140533447265625|cri_loss: 0.007343292236328125|unsuper_loss: 0.0
-average reward score: 4.70703125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.78%) |Training time=0.46s (21.64%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3072|ppo_ep: 1|act_loss: 0.0193023681640625|cri_loss: 0.01043701171875|unsuper_loss: 0.0
-average reward score: 4.890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.92%) |Training time=0.46s (21.54%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3073|ppo_ep: 1|act_loss: 0.00827789306640625|cri_loss: 0.00452423095703125|unsuper_loss: 0.0
-average reward score: 5.23828125
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.62s (72.16%) |Training time=0.47s (20.93%) |Others=0.15 (6.90%)|CurSamplesPerSec=14.25 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3074|ppo_ep: 1|act_loss: -0.0176544189453125|cri_loss: -0.00861358642578125|unsuper_loss: 0.0
-average reward score: 4.89453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.30%) |Training time=0.46s (21.10%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3075|ppo_ep: 1|act_loss: -0.0271453857421875|cri_loss: -0.013336181640625|unsuper_loss: 0.0
-average reward score: 5.03125
--------------------------------------------------------------------------------------
-|E2E latency=2.86s |Gather latency=0.00s (0.00%) |Generate time=1.76s (61.70%) |Training time=0.47s (16.32%) |Others=0.63 (21.97%)|CurSamplesPerSec=11.20 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3076|ppo_ep: 1|act_loss: 0.005237579345703125|cri_loss: 0.003314971923828125|unsuper_loss: 0.0
-average reward score: 5.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.45%) |Training time=0.47s (21.97%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3077|ppo_ep: 1|act_loss: -0.03656005859375|cri_loss: -0.0175323486328125|unsuper_loss: 0.0
-average reward score: 4.71875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (22.13%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3078|ppo_ep: 1|act_loss: 0.0016117095947265625|cri_loss: 0.00209808349609375|unsuper_loss: 0.0
-average reward score: 4.59375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.75%) |Training time=0.47s (21.68%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
-[2023-04-14 10:41:06,415] [INFO] [logging.py:96:log_dist] [Rank 0] step=3080, skipped=41, lr=[6.8770316279038435e-06, 6.8770316279038435e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:41:06,433] [INFO] [timer.py:199:stop] epoch=0/micro_step=3080/global_step=3080, RunningAvgSamplesPerSec=106.16864078143239, CurrSamplesPerSec=106.1309223235332, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:41:06,525] [INFO] [logging.py:96:log_dist] [Rank 0] step=3080, skipped=47, lr=[3.568452669944006e-06, 3.568452669944006e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3079|ppo_ep: 1|act_loss: 0.00951385498046875|cri_loss: 0.01016998291015625|unsuper_loss: 0.0
-average reward score: 4.57421875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.47s (21.68%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3080|ppo_ep: 1|act_loss: 0.019134521484375|cri_loss: 0.00991058349609375|unsuper_loss: 0.0
-average reward score: 4.171875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.53%) |Training time=0.47s (21.90%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3081|ppo_ep: 1|act_loss: 0.0150146484375|cri_loss: 0.0078277587890625|unsuper_loss: 0.0
-average reward score: 4.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.53s |Gather latency=0.00s (0.00%) |Generate time=1.58s (62.59%) |Training time=0.46s (18.38%) |Others=0.48 (19.03%)|CurSamplesPerSec=12.66 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3082|ppo_ep: 1|act_loss: 0.027618408203125|cri_loss: 0.01419830322265625|unsuper_loss: 0.0
-average reward score: 5.3828125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.46%) |Training time=0.47s (21.97%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3083|ppo_ep: 1|act_loss: -0.0113525390625|cri_loss: -0.00547027587890625|unsuper_loss: 0.0
-average reward score: 5.3828125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.46%) |Training time=0.47s (21.98%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3084|ppo_ep: 1|act_loss: -0.04022216796875|cri_loss: -0.019439697265625|unsuper_loss: 0.0
-average reward score: 4.796875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.42%) |Training time=0.47s (22.03%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3085|ppo_ep: 1|act_loss: 0.004192352294921875|cri_loss: 0.0023860931396484375|unsuper_loss: 0.0
-average reward score: 4.58984375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.70%) |Training time=0.47s (21.74%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3086|ppo_ep: 1|act_loss: -0.01071929931640625|cri_loss: -0.00405120849609375|unsuper_loss: 0.0
-average reward score: 5.36328125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.34%) |Training time=0.47s (22.09%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3087|ppo_ep: 1|act_loss: -0.0206756591796875|cri_loss: -0.010009765625|unsuper_loss: 0.0
-average reward score: 4.9453125
--------------------------------------------------------------------------------------
-|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.58s (70.70%) |Training time=0.48s (21.37%) |Others=0.18 (7.92%)|CurSamplesPerSec=14.35 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3088|ppo_ep: 1|act_loss: -0.0251617431640625|cri_loss: -0.01202392578125|unsuper_loss: 0.0
-average reward score: 4.421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (22.10%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
-[2023-04-14 10:41:28,381] [INFO] [logging.py:96:log_dist] [Rank 0] step=3090, skipped=41, lr=[6.860203946484189e-06, 6.860203946484189e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:41:28,399] [INFO] [timer.py:199:stop] epoch=0/micro_step=3090/global_step=3090, RunningAvgSamplesPerSec=106.15902057491887, CurrSamplesPerSec=102.21190862761702, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:41:28,492] [INFO] [logging.py:96:log_dist] [Rank 0] step=3090, skipped=47, lr=[3.5597431043032205e-06, 3.5597431043032205e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3089|ppo_ep: 1|act_loss: -0.025787353515625|cri_loss: -0.01262664794921875|unsuper_loss: 0.0
-average reward score: 5.16796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.48s (21.97%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3090|ppo_ep: 1|act_loss: -0.0203094482421875|cri_loss: -0.009735107421875|unsuper_loss: 0.0
-average reward score: 4.5546875
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.37%) |Training time=0.47s (20.41%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.77 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3091|ppo_ep: 1|act_loss: 0.02978515625|cri_loss: 0.0167083740234375|unsuper_loss: 0.0
-average reward score: 4.8125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.56%) |Training time=0.47s (21.88%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3092|ppo_ep: 1|act_loss: -0.0156097412109375|cri_loss: -0.007183074951171875|unsuper_loss: 0.0
-average reward score: 4.328125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.52%) |Training time=0.47s (21.90%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3093|ppo_ep: 1|act_loss: -0.031402587890625|cri_loss: -0.01488494873046875|unsuper_loss: 0.0
-average reward score: 4.7109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.29%) |Training time=0.46s (21.17%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3094|ppo_ep: 1|act_loss: -0.032623291015625|cri_loss: -0.0154571533203125|unsuper_loss: 0.0
-average reward score: 5.45703125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.54%) |Training time=0.47s (21.90%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3095|ppo_ep: 1|act_loss: -0.0212554931640625|cri_loss: -0.01013946533203125|unsuper_loss: 0.0
-average reward score: 4.9296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.73%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3096|ppo_ep: 1|act_loss: -0.03173828125|cri_loss: -0.0146636962890625|unsuper_loss: 0.0
-average reward score: 4.67578125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.74%) |Training time=0.47s (21.70%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3097|ppo_ep: 1|act_loss: 0.00027441978454589844|cri_loss: 0.0005178451538085938|unsuper_loss: 0.0
-average reward score: 5.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.63%) |Training time=0.47s (21.81%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3098|ppo_ep: 1|act_loss: 0.000667572021484375|cri_loss: 0.0013866424560546875|unsuper_loss: 0.0
-average reward score: 4.4296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.72%) |Training time=0.47s (21.71%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
-[2023-04-14 10:41:50,068] [INFO] [logging.py:96:log_dist] [Rank 0] step=3100, skipped=41, lr=[6.843346098432385e-06, 6.843346098432385e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:41:50,371] [INFO] [timer.py:199:stop] epoch=0/micro_step=3100/global_step=3100, RunningAvgSamplesPerSec=106.1222394732359, CurrSamplesPerSec=54.794599648087136, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:41:50,464] [INFO] [logging.py:96:log_dist] [Rank 0] step=3100, skipped=47, lr=[3.551017830713139e-06, 3.551017830713139e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3099|ppo_ep: 1|act_loss: 0.014007568359375|cri_loss: 0.007511138916015625|unsuper_loss: 0.0
-average reward score: 4.71875
--------------------------------------------------------------------------------------
-|E2E latency=2.44s |Gather latency=0.00s (0.00%) |Generate time=1.59s (65.33%) |Training time=0.75s (30.65%) |Others=0.10 (4.02%)|CurSamplesPerSec=13.13 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3100|ppo_ep: 1|act_loss: 0.05084228515625|cri_loss: 0.0269622802734375|unsuper_loss: 0.0
-average reward score: 4.69140625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.57%) |Training time=0.47s (21.86%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3101|ppo_ep: 1|act_loss: 0.000579833984375|cri_loss: 0.0024852752685546875|unsuper_loss: 0.0
-average reward score: 4.7578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.62%) |Training time=0.47s (21.85%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3102|ppo_ep: 1|act_loss: 0.0220184326171875|cri_loss: 0.0114593505859375|unsuper_loss: 0.0
-average reward score: 4.90625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.90%) |Training time=0.47s (21.60%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3103|ppo_ep: 1|act_loss: 0.01221466064453125|cri_loss: 0.006389617919921875|unsuper_loss: 0.0
-average reward score: 4.40625
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (75.06%) |Training time=0.46s (20.58%) |Others=0.10 (4.36%)|CurSamplesPerSec=14.19 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3104|ppo_ep: 1|act_loss: -0.019287109375|cri_loss: -0.00951385498046875|unsuper_loss: 0.0
-average reward score: 5.40625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.33%) |Training time=0.46s (21.12%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3105|ppo_ep: 1|act_loss: -0.0160675048828125|cri_loss: -0.007110595703125|unsuper_loss: 0.0
-average reward score: 5.3671875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.57%) |Training time=0.47s (21.87%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3106|ppo_ep: 1|act_loss: -0.027008056640625|cri_loss: -0.01226806640625|unsuper_loss: 0.0
-average reward score: 4.2734375
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.41%) |Training time=0.47s (20.34%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.77 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3107|ppo_ep: 1|act_loss: -0.032196044921875|cri_loss: -0.0145111083984375|unsuper_loss: 0.0
-average reward score: 4.69921875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.48%) |Training time=0.47s (21.97%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3108|ppo_ep: 1|act_loss: -0.005462646484375|cri_loss: -0.002117156982421875|unsuper_loss: 0.0
-average reward score: 4.90234375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.17%) |Training time=0.48s (22.26%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
-[2023-04-14 10:42:12,205] [INFO] [logging.py:96:log_dist] [Rank 0] step=3110, skipped=41, lr=[6.826458333622407e-06, 6.826458333622407e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:42:12,224] [INFO] [timer.py:199:stop] epoch=0/micro_step=3110/global_step=3110, RunningAvgSamplesPerSec=106.113810899425, CurrSamplesPerSec=99.49490248637686, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:42:12,316] [INFO] [logging.py:96:log_dist] [Rank 0] step=3110, skipped=47, lr=[3.542276978503364e-06, 3.542276978503364e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3109|ppo_ep: 1|act_loss: 0.0574951171875|cri_loss: 0.030975341796875|unsuper_loss: 0.0
-average reward score: 4.80078125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.10%) |Training time=0.48s (22.38%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3110|ppo_ep: 1|act_loss: 0.02020263671875|cri_loss: 0.01190948486328125|unsuper_loss: 0.0
-average reward score: 4.9296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.20%) |Training time=0.48s (22.23%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3111|ppo_ep: 1|act_loss: 0.019256591796875|cri_loss: 0.0103302001953125|unsuper_loss: 0.0
-average reward score: 4.6953125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.07%) |Training time=0.46s (21.39%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3112|ppo_ep: 1|act_loss: -0.05108642578125|cri_loss: -0.0231170654296875|unsuper_loss: 0.0
-average reward score: 4.80078125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.22%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3113|ppo_ep: 1|act_loss: -0.0019350051879882812|cri_loss: -0.0007886886596679688|unsuper_loss: 0.0
-average reward score: 5.60546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.28%) |Training time=0.48s (22.17%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3114|ppo_ep: 1|act_loss: 0.04736328125|cri_loss: 0.024871826171875|unsuper_loss: 0.0
-average reward score: 4.69140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.19%) |Training time=0.48s (22.26%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3115|ppo_ep: 1|act_loss: 0.0191650390625|cri_loss: 0.01056671142578125|unsuper_loss: 0.0
-average reward score: 5.125
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.58s (70.34%) |Training time=0.47s (21.09%) |Others=0.19 (8.57%)|CurSamplesPerSec=14.22 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3116|ppo_ep: 1|act_loss: 0.0114898681640625|cri_loss: 0.006500244140625|unsuper_loss: 0.0
-average reward score: 5.0625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.24%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3117|ppo_ep: 1|act_loss: -0.003936767578125|cri_loss: -0.001834869384765625|unsuper_loss: 0.0
-average reward score: 5.2421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.30%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3118|ppo_ep: 1|act_loss: -0.01453399658203125|cri_loss: -0.0068359375|unsuper_loss: 0.0
-average reward score: 4.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.48%) |Training time=0.47s (21.96%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
-[2023-04-14 10:42:33,852] [INFO] [logging.py:96:log_dist] [Rank 0] step=3120, skipped=41, lr=[6.80954090237167e-06, 6.80954090237167e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:42:33,870] [INFO] [timer.py:199:stop] epoch=0/micro_step=3120/global_step=3120, RunningAvgSamplesPerSec=106.10118858785661, CurrSamplesPerSec=105.24239249756924, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:42:33,963] [INFO] [logging.py:96:log_dist] [Rank 0] step=3120, skipped=47, lr=[3.533520677234411e-06, 3.533520677234411e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3119|ppo_ep: 1|act_loss: 0.0082855224609375|cri_loss: 0.004329681396484375|unsuper_loss: 0.0
-average reward score: 6.32421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.47s (21.63%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3120|ppo_ep: 1|act_loss: 0.00994873046875|cri_loss: 0.005306243896484375|unsuper_loss: 0.0
-average reward score: 5.265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.72%) |Training time=0.47s (21.73%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3121|ppo_ep: 1|act_loss: 0.0122833251953125|cri_loss: 0.00635528564453125|unsuper_loss: 0.0
-average reward score: 4.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.44%) |Training time=0.47s (20.31%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3122|ppo_ep: 1|act_loss: 0.018890380859375|cri_loss: 0.00965118408203125|unsuper_loss: 0.0
-average reward score: 5.16015625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.70%) |Training time=0.47s (21.72%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3123|ppo_ep: 1|act_loss: 0.012359619140625|cri_loss: 0.00652313232421875|unsuper_loss: 0.0
-average reward score: 4.65234375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.64%) |Training time=0.47s (21.79%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3124|ppo_ep: 1|act_loss: 0.0006108283996582031|cri_loss: 0.0007014274597167969|unsuper_loss: 0.0
-average reward score: 4.7265625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.50%) |Training time=0.47s (21.95%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3125|ppo_ep: 1|act_loss: -0.0137481689453125|cri_loss: -0.006610870361328125|unsuper_loss: 0.0
-average reward score: 5.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.45%) |Training time=0.47s (22.00%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3126|ppo_ep: 1|act_loss: 0.0255279541015625|cri_loss: 0.015472412109375|unsuper_loss: 0.0
-average reward score: 5.25390625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.55%) |Training time=0.47s (21.89%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3127|ppo_ep: 1|act_loss: 0.00579833984375|cri_loss: 0.0034637451171875|unsuper_loss: 0.0
-average reward score: 5.83203125
--------------------------------------------------------------------------------------
-|E2E latency=2.69s |Gather latency=0.00s (0.00%) |Generate time=1.58s (58.80%) |Training time=0.47s (17.47%) |Others=0.64 (23.74%)|CurSamplesPerSec=11.88 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3128|ppo_ep: 1|act_loss: -0.0211181640625|cri_loss: -0.0097198486328125|unsuper_loss: 0.0
-average reward score: 5.33984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.47s (22.04%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
-[2023-04-14 10:42:56,080] [INFO] [logging.py:96:log_dist] [Rank 0] step=3130, skipped=41, lr=[6.792594055437316e-06, 6.792594055437316e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:42:56,098] [INFO] [timer.py:199:stop] epoch=0/micro_step=3130/global_step=3130, RunningAvgSamplesPerSec=106.09292350573443, CurrSamplesPerSec=101.92254909405708, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:42:56,191] [INFO] [logging.py:96:log_dist] [Rank 0] step=3130, skipped=47, lr=[3.524749056695789e-06, 3.524749056695789e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3129|ppo_ep: 1|act_loss: -0.0255126953125|cri_loss: -0.01183319091796875|unsuper_loss: 0.0
-average reward score: 4.42578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.48s (22.08%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3130|ppo_ep: 1|act_loss: -0.01776123046875|cri_loss: -0.00868988037109375|unsuper_loss: 0.0
-average reward score: 5.953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.42%) |Training time=0.48s (22.03%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3131|ppo_ep: 1|act_loss: -0.011505126953125|cri_loss: -0.0054473876953125|unsuper_loss: 0.0
-average reward score: 5.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.16%) |Training time=0.49s (22.35%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3132|ppo_ep: 1|act_loss: 0.0022106170654296875|cri_loss: 0.001453399658203125|unsuper_loss: 0.0
-average reward score: 4.6875
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.68s (74.39%) |Training time=0.48s (21.27%) |Others=0.10 (4.34%)|CurSamplesPerSec=14.16 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3133|ppo_ep: 1|act_loss: 0.0005121231079101562|cri_loss: 0.0004792213439941406|unsuper_loss: 0.0
-average reward score: 4.72265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.89%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3134|ppo_ep: 1|act_loss: 0.01171875|cri_loss: 0.006145477294921875|unsuper_loss: 0.0
-average reward score: 4.5390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (22.04%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3135|ppo_ep: 1|act_loss: -0.0181884765625|cri_loss: -0.0087127685546875|unsuper_loss: 0.0
-average reward score: 4.84375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.68%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3136|ppo_ep: 1|act_loss: 0.018280029296875|cri_loss: 0.0098419189453125|unsuper_loss: 0.0
-average reward score: 4.9609375
--------------------------------------------------------------------------------------
-|E2E latency=2.59s |Gather latency=0.00s (0.00%) |Generate time=1.75s (67.58%) |Training time=0.47s (18.03%) |Others=0.37 (14.39%)|CurSamplesPerSec=12.34 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3137|ppo_ep: 1|act_loss: 0.006168365478515625|cri_loss: 0.0038909912109375|unsuper_loss: 0.0
-average reward score: 5.43359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.86%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3138|ppo_ep: 1|act_loss: -0.0243682861328125|cri_loss: -0.01153564453125|unsuper_loss: 0.0
-average reward score: 4.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.83%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-[2023-04-14 10:43:18,216] [INFO] [logging.py:96:log_dist] [Rank 0] step=3140, skipped=41, lr=[6.775618044012496e-06, 6.775618044012496e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:43:18,234] [INFO] [timer.py:199:stop] epoch=0/micro_step=3140/global_step=3140, RunningAvgSamplesPerSec=106.08196738154236, CurrSamplesPerSec=103.5805649257089, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:43:18,327] [INFO] [logging.py:96:log_dist] [Rank 0] step=3140, skipped=47, lr=[3.5159622469040734e-06, 3.5159622469040734e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3139|ppo_ep: 1|act_loss: -0.0006122589111328125|cri_loss: 0.000782012939453125|unsuper_loss: 0.0
-average reward score: 4.59765625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.86%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3140|ppo_ep: 1|act_loss: -0.0027523040771484375|cri_loss: -0.000537872314453125|unsuper_loss: 0.0
-average reward score: 4.63671875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.81%) |Training time=0.47s (21.63%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3141|ppo_ep: 1|act_loss: 0.009124755859375|cri_loss: 0.0054473876953125|unsuper_loss: 0.0
-average reward score: 4.98046875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.74%) |Training time=0.47s (21.65%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3142|ppo_ep: 1|act_loss: 0.027740478515625|cri_loss: 0.01450347900390625|unsuper_loss: 0.0
-average reward score: 4.7109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.75%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3143|ppo_ep: 1|act_loss: 0.0130767822265625|cri_loss: 0.00711822509765625|unsuper_loss: 0.0
-average reward score: 4.8203125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.46s (21.56%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3144|ppo_ep: 1|act_loss: 0.0394287109375|cri_loss: 0.0209197998046875|unsuper_loss: 0.0
-average reward score: 5.16796875
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.07%) |Training time=0.53s (23.64%) |Others=0.12 (5.28%)|CurSamplesPerSec=14.40 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3145|ppo_ep: 1|act_loss: -0.09375|cri_loss: -0.044036865234375|unsuper_loss: 0.0
-average reward score: 4.8515625
--------------------------------------------------------------------------------------
-|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.58s (70.92%) |Training time=0.55s (24.58%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.33 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3146|ppo_ep: 1|act_loss: -0.1318359375|cri_loss: -0.0621337890625|unsuper_loss: 0.0
-average reward score: 4.7890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.49%) |Training time=0.46s (20.99%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3147|ppo_ep: 1|act_loss: 0.0472412109375|cri_loss: 0.02520751953125|unsuper_loss: 0.0
-average reward score: 5.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.48s (22.15%) |Others=0.10 (4.84%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3148|ppo_ep: 1|act_loss: -0.037689208984375|cri_loss: -0.017608642578125|unsuper_loss: 0.0
-average reward score: 5.6015625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.90%) |Training time=0.46s (21.37%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
-[2023-04-14 10:43:39,941] [INFO] [logging.py:96:log_dist] [Rank 0] step=3150, skipped=41, lr=[6.758613119722655e-06, 6.758613119722655e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:43:39,958] [INFO] [timer.py:199:stop] epoch=0/micro_step=3150/global_step=3150, RunningAvgSamplesPerSec=106.06281589531704, CurrSamplesPerSec=101.26017409621011, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:43:40,051] [INFO] [logging.py:96:log_dist] [Rank 0] step=3150, skipped=47, lr=[3.5071603781009834e-06, 3.5071603781009834e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3149|ppo_ep: 1|act_loss: -0.029327392578125|cri_loss: -0.01125335693359375|unsuper_loss: 0.0
-average reward score: 4.6484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.48s (22.04%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3150|ppo_ep: 1|act_loss: 0.031280517578125|cri_loss: 0.017364501953125|unsuper_loss: 0.0
-average reward score: 4.5
--------------------------------------------------------------------------------------
-|E2E latency=2.42s |Gather latency=0.00s (0.00%) |Generate time=1.60s (66.03%) |Training time=0.49s (20.17%) |Others=0.33 (13.79%)|CurSamplesPerSec=13.24 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3151|ppo_ep: 1|act_loss: 0.063720703125|cri_loss: 0.034332275390625|unsuper_loss: 0.0
-average reward score: 5.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.53%) |Training time=0.43s (19.75%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3152|ppo_ep: 1|act_loss: 0.0151519775390625|cri_loss: 0.00836944580078125|unsuper_loss: 0.0
-average reward score: 4.9765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.40%) |Training time=0.44s (20.03%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3153|ppo_ep: 1|act_loss: 0.010284423828125|cri_loss: 0.0057373046875|unsuper_loss: 0.0
-average reward score: 6.6640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.87%) |Training time=0.45s (20.58%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3154|ppo_ep: 1|act_loss: 0.01165771484375|cri_loss: 0.006153106689453125|unsuper_loss: 0.0
-average reward score: 5.12109375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.37%) |Training time=0.46s (21.10%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3155|ppo_ep: 1|act_loss: -0.015777587890625|cri_loss: -0.00711822509765625|unsuper_loss: 0.0
-average reward score: 4.53515625
--------------------------------------------------------------------------------------
-|E2E latency=2.52s |Gather latency=0.00s (0.00%) |Generate time=1.61s (63.95%) |Training time=0.45s (18.02%) |Others=0.45 (18.03%)|CurSamplesPerSec=12.71 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3156|ppo_ep: 1|act_loss: -0.019256591796875|cri_loss: -0.00878143310546875|unsuper_loss: 0.0
-average reward score: 4.68359375
--------------------------------------------------------------------------------------
-|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.63s (77.07%) |Training time=0.39s (18.27%) |Others=0.10 (4.66%)|CurSamplesPerSec=15.10 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3157|ppo_ep: 1|act_loss: -0.0109405517578125|cri_loss: -0.004871368408203125|unsuper_loss: 0.0
-average reward score: 4.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.08%) |Training time=0.44s (20.40%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3158|ppo_ep: 1|act_loss: -0.0211181640625|cri_loss: -0.01001739501953125|unsuper_loss: 0.0
-average reward score: 5.1953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.34%) |Training time=0.46s (21.08%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-[2023-04-14 10:44:02,182] [INFO] [logging.py:96:log_dist] [Rank 0] step=3160, skipped=41, lr=[6.741579534621794e-06, 6.741579534621794e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:44:02,201] [INFO] [timer.py:199:stop] epoch=0/micro_step=3160/global_step=3160, RunningAvgSamplesPerSec=106.08499964575394, CurrSamplesPerSec=108.97365410549887, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:44:02,293] [INFO] [logging.py:96:log_dist] [Rank 0] step=3160, skipped=47, lr=[3.4983435807514484e-06, 3.4983435807514484e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3159|ppo_ep: 1|act_loss: -0.02764892578125|cri_loss: -0.0126495361328125|unsuper_loss: 0.0
-average reward score: 4.7265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.34%) |Training time=0.46s (21.14%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3160|ppo_ep: 1|act_loss: -0.0206298828125|cri_loss: -0.009796142578125|unsuper_loss: 0.0
-average reward score: 4.6328125
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.77%) |Training time=0.47s (21.12%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.51 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3161|ppo_ep: 1|act_loss: 0.0300445556640625|cri_loss: 0.015899658203125|unsuper_loss: 0.0
-average reward score: 4.94921875
--------------------------------------------------------------------------------------
-|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.71s (74.42%) |Training time=0.49s (21.31%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.91 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3162|ppo_ep: 1|act_loss: 0.069091796875|cri_loss: 0.036834716796875|unsuper_loss: 0.0
-average reward score: 5.2734375
--------------------------------------------------------------------------------------
-|E2E latency=2.44s |Gather latency=0.00s (0.00%) |Generate time=1.62s (66.42%) |Training time=0.45s (18.26%) |Others=0.37 (15.33%)|CurSamplesPerSec=13.09 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3163|ppo_ep: 1|act_loss: -0.01104736328125|cri_loss: -0.00527191162109375|unsuper_loss: 0.0
-average reward score: 5.25
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.37%) |Training time=0.46s (21.08%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3164|ppo_ep: 1|act_loss: -0.017486572265625|cri_loss: -0.0081787109375|unsuper_loss: 0.0
-average reward score: 4.87890625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.48%) |Training time=0.46s (21.01%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3165|ppo_ep: 1|act_loss: -0.029510498046875|cri_loss: -0.0138702392578125|unsuper_loss: 0.0
-average reward score: 5.1640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.82%) |Training time=0.47s (21.66%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3166|ppo_ep: 1|act_loss: -0.00011587142944335938|cri_loss: 0.00028586387634277344|unsuper_loss: 0.0
-average reward score: 4.5859375
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.62s (71.38%) |Training time=0.55s (24.28%) |Others=0.10 (4.34%)|CurSamplesPerSec=14.08 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3167|ppo_ep: 1|act_loss: 0.0069122314453125|cri_loss: 0.0042724609375|unsuper_loss: 0.0
-average reward score: 4.91015625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.39%) |Training time=0.48s (22.07%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3168|ppo_ep: 1|act_loss: 0.004741668701171875|cri_loss: 0.0040435791015625|unsuper_loss: 0.0
-average reward score: 5.3828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.85%) |Training time=0.47s (21.62%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-[2023-04-14 10:44:24,441] [INFO] [logging.py:96:log_dist] [Rank 0] step=3170, skipped=41, lr=[6.724517541188731e-06, 6.724517541188731e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:44:24,509] [INFO] [timer.py:199:stop] epoch=0/micro_step=3170/global_step=3170, RunningAvgSamplesPerSec=106.07379913337502, CurrSamplesPerSec=87.51397169668613, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:44:24,602] [INFO] [logging.py:96:log_dist] [Rank 0] step=3170, skipped=47, lr=[3.4895119855416737e-06, 3.4895119855416737e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3169|ppo_ep: 1|act_loss: 0.00868988037109375|cri_loss: 0.00487518310546875|unsuper_loss: 0.0
-average reward score: 5.484375
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.60s (71.85%) |Training time=0.53s (23.74%) |Others=0.10 (4.41%)|CurSamplesPerSec=14.39 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3170|ppo_ep: 1|act_loss: 0.0268707275390625|cri_loss: 0.014068603515625|unsuper_loss: 0.0
-average reward score: 4.93359375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.05%) |Training time=0.49s (22.44%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3171|ppo_ep: 1|act_loss: 0.0170440673828125|cri_loss: 0.009185791015625|unsuper_loss: 0.0
-average reward score: 5.8515625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.46%) |Training time=0.50s (23.05%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3172|ppo_ep: 1|act_loss: 0.01983642578125|cri_loss: 0.0110321044921875|unsuper_loss: 0.0
-average reward score: 4.79296875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.99%) |Training time=0.49s (22.50%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3173|ppo_ep: 1|act_loss: 0.046783447265625|cri_loss: 0.02850341796875|unsuper_loss: 0.0
-average reward score: 4.71484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.29%) |Training time=0.48s (22.17%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3174|ppo_ep: 1|act_loss: 0.0023670196533203125|cri_loss: 0.001544952392578125|unsuper_loss: 0.0
-average reward score: 5.47265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.32%) |Training time=0.48s (22.14%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3175|ppo_ep: 1|act_loss: -0.0003724098205566406|cri_loss: 0.00018739700317382812|unsuper_loss: 0.0
-average reward score: 5.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.46%) |Training time=0.48s (22.04%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3176|ppo_ep: 1|act_loss: -0.0162811279296875|cri_loss: -0.0079498291015625|unsuper_loss: 0.0
-average reward score: 5.13671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.48s (22.11%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3177|ppo_ep: 1|act_loss: 0.011505126953125|cri_loss: 0.00592803955078125|unsuper_loss: 0.0
-average reward score: 4.97265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (22.10%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3178|ppo_ep: 1|act_loss: 0.051971435546875|cri_loss: 0.02734375|unsuper_loss: 0.0
-average reward score: 4.78515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.14%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-[2023-04-14 10:44:46,203] [INFO] [logging.py:96:log_dist] [Rank 0] step=3180, skipped=41, lr=[6.707427392323372e-06, 6.707427392323372e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:44:46,221] [INFO] [timer.py:199:stop] epoch=0/micro_step=3180/global_step=3180, RunningAvgSamplesPerSec=106.05239888050805, CurrSamplesPerSec=101.55715126687541, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:44:46,314] [INFO] [logging.py:96:log_dist] [Rank 0] step=3180, skipped=47, lr=[3.480665723377205e-06, 3.480665723377205e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3179|ppo_ep: 1|act_loss: 0.07061767578125|cri_loss: 0.0386962890625|unsuper_loss: 0.0
-average reward score: 4.91796875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.49%) |Training time=0.48s (21.99%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3180|ppo_ep: 1|act_loss: 0.0008573532104492188|cri_loss: 0.0007100105285644531|unsuper_loss: 0.0
-average reward score: 5.58984375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.27%) |Training time=0.45s (21.13%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3181|ppo_ep: 1|act_loss: -0.035064697265625|cri_loss: -0.01690673828125|unsuper_loss: 0.0
-average reward score: 5.3203125
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.40%) |Training time=0.48s (20.39%) |Others=0.10 (4.21%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3182|ppo_ep: 1|act_loss: 0.032012939453125|cri_loss: 0.0175628662109375|unsuper_loss: 0.0
-average reward score: 5.26953125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.42%) |Training time=0.45s (20.98%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3183|ppo_ep: 1|act_loss: 0.0159912109375|cri_loss: 0.01290130615234375|unsuper_loss: 0.0
-average reward score: 5.2421875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.66%) |Training time=0.44s (20.74%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3184|ppo_ep: 1|act_loss: -0.00478363037109375|cri_loss: -0.0021953582763671875|unsuper_loss: 0.0
-average reward score: 5.68359375
--------------------------------------------------------------------------------------
-|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.60s (69.27%) |Training time=0.45s (19.35%) |Others=0.26 (11.38%)|CurSamplesPerSec=13.89 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3185|ppo_ep: 1|act_loss: -0.005828857421875|cri_loss: -0.0022563934326171875|unsuper_loss: 0.0
-average reward score: 5.35546875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.65%) |Training time=0.44s (20.76%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3186|ppo_ep: 1|act_loss: 0.0272064208984375|cri_loss: 0.0141143798828125|unsuper_loss: 0.0
-average reward score: 6.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.71%) |Training time=0.44s (20.61%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3187|ppo_ep: 1|act_loss: 0.0460205078125|cri_loss: 0.0245819091796875|unsuper_loss: 0.0
-average reward score: 5.48828125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.37%) |Training time=0.45s (21.04%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3188|ppo_ep: 1|act_loss: 0.00832366943359375|cri_loss: 0.0048675537109375|unsuper_loss: 0.0
-average reward score: 5.15625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.29%) |Training time=0.45s (21.13%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
-[2023-04-14 10:45:07,978] [INFO] [logging.py:96:log_dist] [Rank 0] step=3190, skipped=41, lr=[6.690309341342949e-06, 6.690309341342949e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:45:07,996] [INFO] [timer.py:199:stop] epoch=0/micro_step=3190/global_step=3190, RunningAvgSamplesPerSec=106.06791101364033, CurrSamplesPerSec=112.8028891206655, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:45:08,088] [INFO] [logging.py:96:log_dist] [Rank 0] step=3190, skipped=47, lr=[3.4718049253809894e-06, 3.4718049253809894e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3189|ppo_ep: 1|act_loss: 0.0041351318359375|cri_loss: 0.0023345947265625|unsuper_loss: 0.0
-average reward score: 5.49609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.76%) |Training time=0.45s (20.69%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3190|ppo_ep: 1|act_loss: 0.01507568359375|cri_loss: 0.0077056884765625|unsuper_loss: 0.0
-average reward score: 6.13671875
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.63s (72.68%) |Training time=0.46s (20.58%) |Others=0.15 (6.74%)|CurSamplesPerSec=14.25 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3191|ppo_ep: 1|act_loss: -0.01251220703125|cri_loss: -0.004730224609375|unsuper_loss: 0.0
-average reward score: 4.625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.53%) |Training time=0.48s (21.94%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3192|ppo_ep: 1|act_loss: 0.019500732421875|cri_loss: 0.01045989990234375|unsuper_loss: 0.0
-average reward score: 4.90234375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.90%) |Training time=0.47s (21.55%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3193|ppo_ep: 1|act_loss: -0.024658203125|cri_loss: -0.0119476318359375|unsuper_loss: 0.0
-average reward score: 5.515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.71%) |Training time=0.47s (21.73%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3194|ppo_ep: 1|act_loss: -0.0204010009765625|cri_loss: -0.00936126708984375|unsuper_loss: 0.0
-average reward score: 4.765625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.45%) |Training time=0.45s (20.91%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3195|ppo_ep: 1|act_loss: -0.005889892578125|cri_loss: -0.0024585723876953125|unsuper_loss: 0.0
-average reward score: 4.60546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.41%) |Training time=0.45s (21.04%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3196|ppo_ep: 1|act_loss: 0.000583648681640625|cri_loss: 0.0017080307006835938|unsuper_loss: 0.0
-average reward score: 5.15234375
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.04%) |Training time=0.46s (19.73%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3197|ppo_ep: 1|act_loss: -0.024444580078125|cri_loss: -0.01142120361328125|unsuper_loss: 0.0
-average reward score: 4.7265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.28%) |Training time=0.46s (21.16%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3198|ppo_ep: 1|act_loss: -0.00677490234375|cri_loss: -0.0029048919677734375|unsuper_loss: 0.0
-average reward score: 4.5390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.46s (21.24%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-[2023-04-14 10:45:29,972] [INFO] [logging.py:96:log_dist] [Rank 0] step=3200, skipped=41, lr=[6.673163641978276e-06, 6.673163641978276e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:45:30,562] [INFO] [timer.py:199:stop] epoch=0/micro_step=3200/global_step=3200, RunningAvgSamplesPerSec=105.9945701207933, CurrSamplesPerSec=32.19384756571583, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:45:30,656] [INFO] [logging.py:96:log_dist] [Rank 0] step=3200, skipped=47, lr=[3.462929722891427e-06, 3.462929722891427e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3199|ppo_ep: 1|act_loss: -0.01418304443359375|cri_loss: -0.00630950927734375|unsuper_loss: 0.0
-average reward score: 5.1328125
--------------------------------------------------------------------------------------
-|E2E latency=2.87s |Gather latency=0.00s (0.00%) |Generate time=1.60s (55.75%) |Training time=1.16s (40.33%) |Others=0.11 (3.92%)|CurSamplesPerSec=11.16 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3200|ppo_ep: 1|act_loss: 0.036529541015625|cri_loss: 0.0186614990234375|unsuper_loss: 0.0
-average reward score: 5.20703125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.45%) |Training time=0.45s (20.99%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3201|ppo_ep: 1|act_loss: 0.0728759765625|cri_loss: 0.04498291015625|unsuper_loss: 0.0
-average reward score: 5.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.39%) |Training time=0.48s (22.06%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3202|ppo_ep: 1|act_loss: 0.003406524658203125|cri_loss: 0.00264739990234375|unsuper_loss: 0.0
-average reward score: 5.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.92%) |Training time=0.47s (21.54%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3203|ppo_ep: 1|act_loss: 0.021942138671875|cri_loss: 0.01192474365234375|unsuper_loss: 0.0
-average reward score: 5.671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.48s (22.06%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3204|ppo_ep: 1|act_loss: -0.02435302734375|cri_loss: -0.01165771484375|unsuper_loss: 0.0
-average reward score: 5.75
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.55%) |Training time=0.47s (21.88%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3205|ppo_ep: 1|act_loss: 0.027679443359375|cri_loss: 0.0162811279296875|unsuper_loss: 0.0
-average reward score: 5.1953125
--------------------------------------------------------------------------------------
-|E2E latency=2.76s |Gather latency=0.00s (0.00%) |Generate time=1.58s (57.30%) |Training time=0.47s (16.98%) |Others=0.71 (25.72%)|CurSamplesPerSec=11.58 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3206|ppo_ep: 1|act_loss: -0.04412841796875|cri_loss: -0.0216217041015625|unsuper_loss: 0.0
-average reward score: 5.85546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.46%) |Training time=0.47s (21.98%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3207|ppo_ep: 1|act_loss: -0.0545654296875|cri_loss: -0.0265045166015625|unsuper_loss: 0.0
-average reward score: 4.78515625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.45%) |Training time=0.47s (21.99%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-[2023-04-14 10:45:50,602] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-[2023-04-14 10:45:50,686] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 3208|ppo_ep: 1|act_loss: -0.046234130859375|cri_loss: -0.02215576171875|unsuper_loss: 0.0
-average reward score: 5.7421875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.13%) |Training time=0.47s (21.79%) |Others=0.09 (4.08%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
-[2023-04-14 10:45:52,725] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-[2023-04-14 10:45:52,725] [INFO] [logging.py:96:log_dist] [Rank 0] step=3210, skipped=43, lr=[6.659427346418702e-06, 6.659427346418702e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:45:52,725] [INFO] [timer.py:199:stop] epoch=0/micro_step=3210/global_step=3210, RunningAvgSamplesPerSec=105.99191837382371, CurrSamplesPerSec=116.19224419656577, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:45:52,808] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-[2023-04-14 10:45:52,809] [INFO] [logging.py:96:log_dist] [Rank 0] step=3210, skipped=49, lr=[3.4558192780603294e-06, 3.4558192780603294e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3209|ppo_ep: 1|act_loss: -0.0201263427734375|cri_loss: -0.008544921875|unsuper_loss: 0.0
-average reward score: 5.08203125
--------------------------------------------------------------------------------------
-|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.27%) |Training time=0.44s (20.60%) |Others=0.09 (4.12%)|CurSamplesPerSec=15.08 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3210|ppo_ep: 1|act_loss: -0.02313232421875|cri_loss: -0.01012420654296875|unsuper_loss: 0.0
-average reward score: 5.01953125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.70%) |Training time=0.47s (21.73%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3211|ppo_ep: 1|act_loss: -0.0171661376953125|cri_loss: -0.008209228515625|unsuper_loss: 0.0
-average reward score: 5.80078125
--------------------------------------------------------------------------------------
-|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.25%) |Training time=0.51s (21.60%) |Others=0.10 (4.15%)|CurSamplesPerSec=13.51 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3212|ppo_ep: 1|act_loss: 0.002307891845703125|cri_loss: 0.0018768310546875|unsuper_loss: 0.0
-average reward score: 5.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.93%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3213|ppo_ep: 1|act_loss: 0.046966552734375|cri_loss: 0.024261474609375|unsuper_loss: 0.0
-average reward score: 5.5625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.58%) |Training time=0.47s (21.90%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3214|ppo_ep: 1|act_loss: 0.0421142578125|cri_loss: 0.02203369140625|unsuper_loss: 0.0
-average reward score: 6.125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.48s (21.96%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3215|ppo_ep: 1|act_loss: 0.04229736328125|cri_loss: 0.02191162109375|unsuper_loss: 0.0
-average reward score: 5.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (22.06%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3216|ppo_ep: 1|act_loss: 0.041778564453125|cri_loss: 0.0214996337890625|unsuper_loss: 0.0
-average reward score: 5.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.26%) |Training time=0.48s (22.12%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3217|ppo_ep: 1|act_loss: -0.0154876708984375|cri_loss: -0.00630950927734375|unsuper_loss: 0.0
-average reward score: 5.609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.48s (22.05%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3218|ppo_ep: 1|act_loss: -0.0030689239501953125|cri_loss: -0.0011997222900390625|unsuper_loss: 0.0
-average reward score: 5.98046875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.57%) |Training time=0.48s (21.88%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
-[2023-04-14 10:46:14,630] [INFO] [logging.py:96:log_dist] [Rank 0] step=3220, skipped=43, lr=[6.642232520669742e-06, 6.642232520669742e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:46:14,648] [INFO] [timer.py:199:stop] epoch=0/micro_step=3220/global_step=3220, RunningAvgSamplesPerSec=105.97259385289864, CurrSamplesPerSec=94.68141865104559, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:46:14,741] [INFO] [logging.py:96:log_dist] [Rank 0] step=3220, skipped=49, lr=[3.4469184791357944e-06, 3.4469184791357944e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3219|ppo_ep: 1|act_loss: -0.0013322830200195312|cri_loss: 6.29425048828125e-05|unsuper_loss: 0.0
-average reward score: 5.74609375
--------------------------------------------------------------------------------------
-|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.12%) |Training time=0.50s (22.47%) |Others=0.10 (4.41%)|CurSamplesPerSec=14.37 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3220|ppo_ep: 1|act_loss: -0.049072265625|cri_loss: -0.0238494873046875|unsuper_loss: 0.0
-average reward score: 5.3828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.27%) |Training time=0.48s (22.17%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3221|ppo_ep: 1|act_loss: -0.05157470703125|cri_loss: -0.022247314453125|unsuper_loss: 0.0
-average reward score: 4.953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.22%) |Training time=0.48s (22.11%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3222|ppo_ep: 1|act_loss: 0.00411224365234375|cri_loss: 0.003429412841796875|unsuper_loss: 0.0
-average reward score: 5.85546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.92%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3223|ppo_ep: 1|act_loss: -0.045166015625|cri_loss: -0.02154541015625|unsuper_loss: 0.0
-average reward score: 5.2421875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.64%) |Training time=0.50s (22.80%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3224|ppo_ep: 1|act_loss: -0.01055908203125|cri_loss: -0.0046844482421875|unsuper_loss: 0.0
-average reward score: 5.9140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.27%) |Training time=0.48s (22.21%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3225|ppo_ep: 1|act_loss: -0.03643798828125|cri_loss: -0.017333984375|unsuper_loss: 0.0
-average reward score: 4.9765625
--------------------------------------------------------------------------------------
-|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.59s (67.61%) |Training time=0.54s (22.98%) |Others=0.22 (9.41%)|CurSamplesPerSec=13.59 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3226|ppo_ep: 1|act_loss: 0.02813720703125|cri_loss: 0.014892578125|unsuper_loss: 0.0
-average reward score: 5.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.33%) |Training time=0.48s (22.14%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3227|ppo_ep: 1|act_loss: 0.068115234375|cri_loss: 0.035675048828125|unsuper_loss: 0.0
-average reward score: 4.8984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.92%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3228|ppo_ep: 1|act_loss: 0.10015869140625|cri_loss: 0.0543212890625|unsuper_loss: 0.0
-average reward score: 5.58203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.23%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-[2023-04-14 10:46:36,491] [INFO] [logging.py:96:log_dist] [Rank 0] step=3230, skipped=43, lr=[6.625010759150993e-06, 6.625010759150993e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:46:36,510] [INFO] [timer.py:199:stop] epoch=0/micro_step=3230/global_step=3230, RunningAvgSamplesPerSec=105.94732779611219, CurrSamplesPerSec=99.21879437735863, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:46:36,602] [INFO] [logging.py:96:log_dist] [Rank 0] step=3230, skipped=49, lr=[3.4380036445950826e-06, 3.4380036445950826e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3229|ppo_ep: 1|act_loss: 0.0136871337890625|cri_loss: 0.00812530517578125|unsuper_loss: 0.0
-average reward score: 5.30078125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.10%) |Training time=0.48s (22.40%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3230|ppo_ep: 1|act_loss: 0.028350830078125|cri_loss: 0.01450347900390625|unsuper_loss: 0.0
-average reward score: 5.3515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.09%) |Training time=0.48s (22.37%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3231|ppo_ep: 1|act_loss: -0.0118408203125|cri_loss: -0.00531768798828125|unsuper_loss: 0.0
-average reward score: 4.86328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.16%) |Training time=0.48s (22.31%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3232|ppo_ep: 1|act_loss: -0.00424957275390625|cri_loss: -0.0014390945434570312|unsuper_loss: 0.0
-average reward score: 4.9375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.03%) |Training time=0.49s (22.45%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3233|ppo_ep: 1|act_loss: -0.02142333984375|cri_loss: -0.01001739501953125|unsuper_loss: 0.0
-average reward score: 5.3203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.49s (22.46%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3234|ppo_ep: 1|act_loss: -0.0184478759765625|cri_loss: -0.00812530517578125|unsuper_loss: 0.0
-average reward score: 4.515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.22%) |Training time=0.48s (22.24%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3235|ppo_ep: 1|act_loss: -0.0230255126953125|cri_loss: -0.01080322265625|unsuper_loss: 0.0
-average reward score: 4.6328125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.05%) |Training time=0.48s (22.31%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3236|ppo_ep: 1|act_loss: 0.011444091796875|cri_loss: 0.0060272216796875|unsuper_loss: 0.0
-average reward score: 4.8671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.13%) |Training time=0.48s (22.34%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3237|ppo_ep: 1|act_loss: -0.037109375|cri_loss: -0.01812744140625|unsuper_loss: 0.0
-average reward score: 5.5859375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.09%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3238|ppo_ep: 1|act_loss: 0.016021728515625|cri_loss: 0.00850677490234375|unsuper_loss: 0.0
-average reward score: 5.20703125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.76%) |Training time=0.49s (22.68%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
-[2023-04-14 10:46:58,161] [INFO] [logging.py:96:log_dist] [Rank 0] step=3240, skipped=43, lr=[6.6077623171305024e-06, 6.6077623171305024e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:46:58,179] [INFO] [timer.py:199:stop] epoch=0/micro_step=3240/global_step=3240, RunningAvgSamplesPerSec=105.92459917021735, CurrSamplesPerSec=96.84810997952171, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:46:58,274] [INFO] [logging.py:96:log_dist] [Rank 0] step=3240, skipped=49, lr=[3.4290749065775475e-06, 3.4290749065775475e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3239|ppo_ep: 1|act_loss: -0.0272216796875|cri_loss: -0.0132293701171875|unsuper_loss: 0.0
-average reward score: 5.76953125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.79%) |Training time=0.49s (22.62%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3240|ppo_ep: 1|act_loss: 0.01654052734375|cri_loss: 0.00868988037109375|unsuper_loss: 0.0
-average reward score: 5.5859375
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.81%) |Training time=0.47s (21.39%) |Others=0.15 (6.80%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3241|ppo_ep: 1|act_loss: 0.05535888671875|cri_loss: 0.02935791015625|unsuper_loss: 0.0
-average reward score: 5.2421875
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.58%) |Training time=0.48s (21.08%) |Others=0.10 (4.34%)|CurSamplesPerSec=14.10 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3242|ppo_ep: 1|act_loss: -0.00484466552734375|cri_loss: -0.002094268798828125|unsuper_loss: 0.0
-average reward score: 5.78125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.37%) |Training time=0.48s (22.07%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3243|ppo_ep: 1|act_loss: 0.03314208984375|cri_loss: 0.0167999267578125|unsuper_loss: 0.0
-average reward score: 5.23046875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.94%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3244|ppo_ep: 1|act_loss: 0.009765625|cri_loss: 0.005191802978515625|unsuper_loss: 0.0
-average reward score: 5.6171875
--------------------------------------------------------------------------------------
-|E2E latency=2.65s |Gather latency=0.00s (0.00%) |Generate time=1.58s (59.59%) |Training time=0.48s (18.08%) |Others=0.59 (22.32%)|CurSamplesPerSec=12.06 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3245|ppo_ep: 1|act_loss: -0.00560760498046875|cri_loss: -0.002361297607421875|unsuper_loss: 0.0
-average reward score: 5.38671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.28%) |Training time=0.48s (22.14%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3246|ppo_ep: 1|act_loss: -0.0221405029296875|cri_loss: -0.01041412353515625|unsuper_loss: 0.0
-average reward score: 5.17578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.28%) |Training time=0.48s (22.18%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3247|ppo_ep: 1|act_loss: -0.023529052734375|cri_loss: -0.010528564453125|unsuper_loss: 0.0
-average reward score: 4.90625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.70%) |Training time=0.47s (21.74%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3248|ppo_ep: 1|act_loss: -0.033416748046875|cri_loss: -0.0162506103515625|unsuper_loss: 0.0
-average reward score: 4.53125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.77%) |Training time=0.48s (21.75%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.46
-[2023-04-14 10:47:20,543] [INFO] [logging.py:96:log_dist] [Rank 0] step=3250, skipped=43, lr=[6.590487450271792e-06, 6.590487450271792e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:47:20,561] [INFO] [timer.py:199:stop] epoch=0/micro_step=3250/global_step=3250, RunningAvgSamplesPerSec=105.9117506771129, CurrSamplesPerSec=100.8401444032472, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:47:20,654] [INFO] [logging.py:96:log_dist] [Rank 0] step=3250, skipped=49, lr=[3.420132397428625e-06, 3.420132397428625e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3249|ppo_ep: 1|act_loss: -0.01910400390625|cri_loss: -0.00933837890625|unsuper_loss: 0.0
-average reward score: 5.56640625
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.51%) |Training time=0.48s (21.18%) |Others=0.10 (4.32%)|CurSamplesPerSec=14.13 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3250|ppo_ep: 1|act_loss: 0.024017333984375|cri_loss: 0.01232147216796875|unsuper_loss: 0.0
-average reward score: 5.7265625
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.58s (68.45%) |Training time=0.48s (20.75%) |Others=0.25 (10.80%)|CurSamplesPerSec=13.87 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3251|ppo_ep: 1|act_loss: 0.0186614990234375|cri_loss: 0.00963592529296875|unsuper_loss: 0.0
-average reward score: 5.00390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.31%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3252|ppo_ep: 1|act_loss: -0.0013141632080078125|cri_loss: -0.0002598762512207031|unsuper_loss: 0.0
-average reward score: 4.7265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.42%) |Training time=0.48s (22.03%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3253|ppo_ep: 1|act_loss: 0.00670623779296875|cri_loss: 0.0039520263671875|unsuper_loss: 0.0
-average reward score: 5.0234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.81%) |Training time=0.49s (22.64%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3254|ppo_ep: 1|act_loss: 0.005558013916015625|cri_loss: 0.00290679931640625|unsuper_loss: 0.0
-average reward score: 5.546875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.49s (22.46%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3255|ppo_ep: 1|act_loss: 0.01549530029296875|cri_loss: 0.0082550048828125|unsuper_loss: 0.0
-average reward score: 4.7421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.89%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3256|ppo_ep: 1|act_loss: -0.0132904052734375|cri_loss: -0.00640869140625|unsuper_loss: 0.0
-average reward score: 5.2109375
--------------------------------------------------------------------------------------
-|E2E latency=2.40s |Gather latency=0.00s (0.00%) |Generate time=1.58s (65.96%) |Training time=0.49s (20.36%) |Others=0.33 (13.68%)|CurSamplesPerSec=13.36 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3257|ppo_ep: 1|act_loss: 0.006717681884765625|cri_loss: 0.00504302978515625|unsuper_loss: 0.0
-average reward score: 5.13671875
--------------------------------------------------------------------------------------
-|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.33%) |Training time=0.51s (21.53%) |Others=0.10 (4.15%)|CurSamplesPerSec=13.48 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3258|ppo_ep: 1|act_loss: -0.035125732421875|cri_loss: -0.01690673828125|unsuper_loss: 0.0
-average reward score: 5.609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.31%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-[2023-04-14 10:47:42,752] [INFO] [logging.py:96:log_dist] [Rank 0] step=3260, skipped=43, lr=[6.573186414630062e-06, 6.573186414630062e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:47:42,770] [INFO] [timer.py:199:stop] epoch=0/micro_step=3260/global_step=3260, RunningAvgSamplesPerSec=105.8897419676038, CurrSamplesPerSec=100.19568274861223, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:47:42,863] [INFO] [logging.py:96:log_dist] [Rank 0] step=3260, skipped=49, lr=[3.4111762496978753e-06, 3.4111762496978753e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3259|ppo_ep: 1|act_loss: -0.01148223876953125|cri_loss: -0.005428314208984375|unsuper_loss: 0.0
-average reward score: 5.5234375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.21%) |Training time=0.48s (22.27%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3260|ppo_ep: 1|act_loss: 0.01776123046875|cri_loss: 0.00902557373046875|unsuper_loss: 0.0
-average reward score: 5.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.31%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3261|ppo_ep: 1|act_loss: -0.0051727294921875|cri_loss: -0.0015697479248046875|unsuper_loss: 0.0
-average reward score: 4.79296875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.09%) |Training time=0.48s (22.38%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3262|ppo_ep: 1|act_loss: 0.0318603515625|cri_loss: 0.017333984375|unsuper_loss: 0.0
-average reward score: 4.53125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.25%) |Training time=0.48s (22.21%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3263|ppo_ep: 1|act_loss: 0.01312255859375|cri_loss: 0.006908416748046875|unsuper_loss: 0.0
-average reward score: 4.88671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.91%) |Training time=0.49s (22.57%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3264|ppo_ep: 1|act_loss: 0.036163330078125|cri_loss: 0.0189971923828125|unsuper_loss: 0.0
-average reward score: 5.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.80%) |Training time=0.49s (22.68%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3265|ppo_ep: 1|act_loss: 0.01306915283203125|cri_loss: 0.006793975830078125|unsuper_loss: 0.0
-average reward score: 5.68359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.34%) |Training time=0.48s (22.13%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3266|ppo_ep: 1|act_loss: -0.026123046875|cri_loss: -0.01198577880859375|unsuper_loss: 0.0
-average reward score: 5.3515625
--------------------------------------------------------------------------------------
-|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.58s (66.72%) |Training time=0.48s (20.25%) |Others=0.31 (13.03%)|CurSamplesPerSec=13.51 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3267|ppo_ep: 1|act_loss: -0.0018949508666992188|cri_loss: -0.0007877349853515625|unsuper_loss: 0.0
-average reward score: 5.26953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.26%) |Training time=0.48s (22.16%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3268|ppo_ep: 1|act_loss: -0.002704620361328125|cri_loss: 0.0006694793701171875|unsuper_loss: 0.0
-average reward score: 5.3046875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.16%) |Training time=0.48s (22.24%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
-[2023-04-14 10:48:04,610] [INFO] [logging.py:96:log_dist] [Rank 0] step=3270, skipped=43, lr=[6.555859466648397e-06, 6.555859466648397e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:48:04,628] [INFO] [timer.py:199:stop] epoch=0/micro_step=3270/global_step=3270, RunningAvgSamplesPerSec=105.86868857095156, CurrSamplesPerSec=97.09289883127744, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:48:04,721] [INFO] [logging.py:96:log_dist] [Rank 0] step=3270, skipped=49, lr=[3.4022065961370106e-06, 3.4022065961370106e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3269|ppo_ep: 1|act_loss: -0.00811004638671875|cri_loss: -0.003635406494140625|unsuper_loss: 0.0
-average reward score: 5.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.86%) |Training time=0.49s (22.63%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3270|ppo_ep: 1|act_loss: 0.01363372802734375|cri_loss: 0.007175445556640625|unsuper_loss: 0.0
-average reward score: 5.62890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.95%) |Training time=0.49s (22.52%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3271|ppo_ep: 1|act_loss: 0.05487060546875|cri_loss: 0.0282440185546875|unsuper_loss: 0.0
-average reward score: 5.11328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.09%) |Training time=0.49s (22.40%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3272|ppo_ep: 1|act_loss: -0.015411376953125|cri_loss: -0.007541656494140625|unsuper_loss: 0.0
-average reward score: 5.40234375
--------------------------------------------------------------------------------------
-|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.58s (70.69%) |Training time=0.50s (22.16%) |Others=0.16 (7.16%)|CurSamplesPerSec=14.30 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3273|ppo_ep: 1|act_loss: 0.034271240234375|cri_loss: 0.0180206298828125|unsuper_loss: 0.0
-average reward score: 5.1171875
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.90%) |Training time=0.50s (22.62%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3274|ppo_ep: 1|act_loss: 0.0107421875|cri_loss: 0.006000518798828125|unsuper_loss: 0.0
-average reward score: 5.23046875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.31%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3275|ppo_ep: 1|act_loss: 0.02215576171875|cri_loss: 0.011566162109375|unsuper_loss: 0.0
-average reward score: 5.30859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.59%) |Training time=0.50s (22.81%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3276|ppo_ep: 1|act_loss: -0.01959228515625|cri_loss: -0.009613037109375|unsuper_loss: 0.0
-average reward score: 5.484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.90%) |Training time=0.49s (22.55%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3277|ppo_ep: 1|act_loss: 0.0102691650390625|cri_loss: 0.00539398193359375|unsuper_loss: 0.0
-average reward score: 5.3984375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.72%) |Training time=0.50s (22.81%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3278|ppo_ep: 1|act_loss: 0.01190185546875|cri_loss: 0.0086517333984375|unsuper_loss: 0.0
-average reward score: 4.77734375
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.68s (73.95%) |Training time=0.49s (21.68%) |Others=0.10 (4.38%)|CurSamplesPerSec=14.05 |AvgSamplesPerSec=14.46
-[2023-04-14 10:48:26,538] [INFO] [logging.py:96:log_dist] [Rank 0] step=3280, skipped=43, lr=[6.538506863153967e-06, 6.538506863153967e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:48:26,556] [INFO] [timer.py:199:stop] epoch=0/micro_step=3280/global_step=3280, RunningAvgSamplesPerSec=105.8395083408653, CurrSamplesPerSec=98.7977475322228, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:48:26,649] [INFO] [logging.py:96:log_dist] [Rank 0] step=3280, skipped=49, lr=[3.3932235696979367e-06, 3.3932235696979367e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3279|ppo_ep: 1|act_loss: 0.033966064453125|cri_loss: 0.01898193359375|unsuper_loss: 0.0
-average reward score: 5.12890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.02%) |Training time=0.49s (22.47%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3280|ppo_ep: 1|act_loss: -0.03369140625|cri_loss: -0.0164642333984375|unsuper_loss: 0.0
-average reward score: 5.21484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.85%) |Training time=0.49s (22.62%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3281|ppo_ep: 1|act_loss: -0.0006170272827148438|cri_loss: -0.00013208389282226562|unsuper_loss: 0.0
-average reward score: 5.1484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.95%) |Training time=0.49s (22.39%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3282|ppo_ep: 1|act_loss: 3.743171691894531e-05|cri_loss: 0.00023567676544189453|unsuper_loss: 0.0
-average reward score: 5.03515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.05%) |Training time=0.49s (22.42%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3283|ppo_ep: 1|act_loss: 0.01375579833984375|cri_loss: 0.007175445556640625|unsuper_loss: 0.0
-average reward score: 5.859375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.83%) |Training time=0.49s (22.68%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3284|ppo_ep: 1|act_loss: 0.027130126953125|cri_loss: 0.01393890380859375|unsuper_loss: 0.0
-average reward score: 5.32421875
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.32%) |Training time=0.49s (22.14%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3285|ppo_ep: 1|act_loss: 0.01788330078125|cri_loss: 0.0091400146484375|unsuper_loss: 0.0
-average reward score: 6.1328125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.21%) |Training time=0.49s (22.29%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3286|ppo_ep: 1|act_loss: 0.0101470947265625|cri_loss: 0.005603790283203125|unsuper_loss: 0.0
-average reward score: 5.80078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.10%) |Training time=0.46s (21.16%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3287|ppo_ep: 1|act_loss: -0.020721435546875|cri_loss: -0.008544921875|unsuper_loss: 0.0
-average reward score: 4.734375
--------------------------------------------------------------------------------------
-|E2E latency=2.43s |Gather latency=0.00s (0.00%) |Generate time=1.77s (72.74%) |Training time=0.50s (20.42%) |Others=0.17 (6.83%)|CurSamplesPerSec=13.15 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3288|ppo_ep: 1|act_loss: -0.0085296630859375|cri_loss: -0.00321197509765625|unsuper_loss: 0.0
-average reward score: 5.203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.99%) |Training time=0.47s (21.47%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-[2023-04-14 10:48:48,531] [INFO] [logging.py:96:log_dist] [Rank 0] step=3290, skipped=43, lr=[6.521128861354213e-06, 6.521128861354213e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:48:48,549] [INFO] [timer.py:199:stop] epoch=0/micro_step=3290/global_step=3290, RunningAvgSamplesPerSec=105.82224074120158, CurrSamplesPerSec=108.69055545522937, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:48:48,641] [INFO] [logging.py:96:log_dist] [Rank 0] step=3290, skipped=49, lr=[3.384227303530776e-06, 3.384227303530776e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3289|ppo_ep: 1|act_loss: -0.001251220703125|cri_loss: -0.00023698806762695312|unsuper_loss: 0.0
-average reward score: 4.7421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.28%) |Training time=0.46s (21.19%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3290|ppo_ep: 1|act_loss: -0.016754150390625|cri_loss: -0.007965087890625|unsuper_loss: 0.0
-average reward score: 4.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.31%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3291|ppo_ep: 1|act_loss: 0.060546875|cri_loss: 0.032806396484375|unsuper_loss: 0.0
-average reward score: 5.28515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.13%) |Training time=0.46s (21.25%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3292|ppo_ep: 1|act_loss: 0.0125274658203125|cri_loss: 0.00705718994140625|unsuper_loss: 0.0
-average reward score: 5.5546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.91%) |Training time=0.47s (21.55%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3293|ppo_ep: 1|act_loss: 0.06329345703125|cri_loss: 0.032257080078125|unsuper_loss: 0.0
-average reward score: 5.08203125
--------------------------------------------------------------------------------------
-|E2E latency=3.53s |Gather latency=0.00s (0.00%) |Generate time=1.60s (45.37%) |Training time=0.46s (13.04%) |Others=1.47 (41.59%)|CurSamplesPerSec=9.07 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3294|ppo_ep: 1|act_loss: 0.00348663330078125|cri_loss: 0.00228118896484375|unsuper_loss: 0.0
-average reward score: 4.83984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.46%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3295|ppo_ep: 1|act_loss: 0.033905029296875|cri_loss: 0.0184326171875|unsuper_loss: 0.0
-average reward score: 5.15234375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.28%) |Training time=0.46s (21.15%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3296|ppo_ep: 1|act_loss: -0.00469970703125|cri_loss: -0.0013952255249023438|unsuper_loss: 0.0
-average reward score: 5.265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.18%) |Training time=0.46s (21.29%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3297|ppo_ep: 1|act_loss: 0.0008997917175292969|cri_loss: 0.0009350776672363281|unsuper_loss: 0.0
-average reward score: 4.5390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.13%) |Training time=0.46s (21.32%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3298|ppo_ep: 1|act_loss: -0.03765869140625|cri_loss: -0.0171966552734375|unsuper_loss: 0.0
-average reward score: 4.66015625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.64%) |Training time=0.48s (21.85%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.46
-[2023-04-14 10:49:11,545] [INFO] [logging.py:96:log_dist] [Rank 0] step=3300, skipped=43, lr=[6.503725718833046e-06, 6.503725718833046e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:49:11,564] [INFO] [timer.py:199:stop] epoch=0/micro_step=3300/global_step=3300, RunningAvgSamplesPerSec=105.82462227179246, CurrSamplesPerSec=108.39885509746532, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:49:11,656] [INFO] [logging.py:96:log_dist] [Rank 0] step=3300, skipped=49, lr=[3.375217930981894e-06, 3.375217930981894e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3299|ppo_ep: 1|act_loss: -0.0273895263671875|cri_loss: -0.013458251953125|unsuper_loss: 0.0
-average reward score: 5.53125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.31%) |Training time=0.46s (21.15%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3300|ppo_ep: 1|act_loss: 0.088134765625|cri_loss: 0.050750732421875|unsuper_loss: 0.0
-average reward score: 5.4609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.04%) |Training time=0.46s (21.41%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3301|ppo_ep: 1|act_loss: 0.0094757080078125|cri_loss: 0.00507354736328125|unsuper_loss: 0.0
-average reward score: 5.82421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.07%) |Training time=0.46s (21.40%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3302|ppo_ep: 1|act_loss: 0.05609130859375|cri_loss: 0.029510498046875|unsuper_loss: 0.0
-average reward score: 5.265625
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.17%) |Training time=0.46s (19.63%) |Others=0.10 (4.20%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3303|ppo_ep: 1|act_loss: 0.0234527587890625|cri_loss: 0.0122833251953125|unsuper_loss: 0.0
-average reward score: 4.52734375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.40%) |Training time=0.46s (21.08%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3304|ppo_ep: 1|act_loss: 0.040771484375|cri_loss: 0.0233306884765625|unsuper_loss: 0.0
-average reward score: 4.78515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.89%) |Training time=0.47s (21.57%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3305|ppo_ep: 1|act_loss: -0.0093994140625|cri_loss: -0.003631591796875|unsuper_loss: 0.0
-average reward score: 4.43359375
--------------------------------------------------------------------------------------
-|E2E latency=3.12s |Gather latency=0.00s (0.00%) |Generate time=1.61s (51.71%) |Training time=0.46s (14.70%) |Others=1.05 (33.59%)|CurSamplesPerSec=10.25 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3306|ppo_ep: 1|act_loss: -0.03729248046875|cri_loss: -0.0177459716796875|unsuper_loss: 0.0
-average reward score: 4.79296875
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.68s (73.49%) |Training time=0.51s (22.17%) |Others=0.10 (4.34%)|CurSamplesPerSec=14.01 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3307|ppo_ep: 1|act_loss: -0.025238037109375|cri_loss: -0.0121307373046875|unsuper_loss: 0.0
-average reward score: 4.80078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.03%) |Training time=0.46s (21.43%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3308|ppo_ep: 1|act_loss: -0.038482666015625|cri_loss: -0.018707275390625|unsuper_loss: 0.0
-average reward score: 4.92578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.08%) |Training time=0.46s (21.37%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-[2023-04-14 10:49:34,441] [INFO] [logging.py:96:log_dist] [Rank 0] step=3310, skipped=43, lr=[6.4862976935470215e-06, 6.4862976935470215e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:49:34,459] [INFO] [timer.py:199:stop] epoch=0/micro_step=3310/global_step=3310, RunningAvgSamplesPerSec=105.82343541265251, CurrSamplesPerSec=106.25190725481177, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:49:34,552] [INFO] [logging.py:96:log_dist] [Rank 0] step=3310, skipped=49, lr=[3.366195585591927e-06, 3.366195585591927e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3309|ppo_ep: 1|act_loss: -0.03485107421875|cri_loss: -0.0152740478515625|unsuper_loss: 0.0
-average reward score: 4.28125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.42%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-[2023-04-14 10:49:36,594] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-[2023-04-14 10:49:36,677] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 3310|ppo_ep: 1|act_loss: 0.00023245811462402344|cri_loss: 0.0003600120544433594|unsuper_loss: 0.0
-average reward score: 5.21875
--------------------------------------------------------------------------------------
-|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.50%) |Training time=0.43s (20.39%) |Others=0.09 (4.11%)|CurSamplesPerSec=15.07 |AvgSamplesPerSec=14.46
-[2023-04-14 10:49:38,737] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-[2023-04-14 10:49:38,820] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 3311|ppo_ep: 1|act_loss: -0.0281982421875|cri_loss: -0.01326751708984375|unsuper_loss: 0.0
-average reward score: 5.37890625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.75%) |Training time=0.43s (20.17%) |Others=0.09 (4.09%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3312|ppo_ep: 1|act_loss: -0.026275634765625|cri_loss: -0.01253509521484375|unsuper_loss: 0.0
-average reward score: 5.015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.12%) |Training time=0.46s (21.25%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3313|ppo_ep: 1|act_loss: 0.033477783203125|cri_loss: 0.017578125|unsuper_loss: 0.0
-average reward score: 4.15625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.41%) |Training time=0.45s (21.00%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3314|ppo_ep: 1|act_loss: 0.049713134765625|cri_loss: 0.0262298583984375|unsuper_loss: 0.0
-average reward score: 4.9296875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.64%) |Training time=0.47s (21.81%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3315|ppo_ep: 1|act_loss: -0.009429931640625|cri_loss: -0.004192352294921875|unsuper_loss: 0.0
-average reward score: 6.03515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.95%) |Training time=0.46s (21.41%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3316|ppo_ep: 1|act_loss: 0.0149383544921875|cri_loss: 0.007770538330078125|unsuper_loss: 0.0
-average reward score: 4.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.60s (70.84%) |Training time=0.48s (21.38%) |Others=0.18 (7.78%)|CurSamplesPerSec=14.16 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3317|ppo_ep: 1|act_loss: -0.0093841552734375|cri_loss: -0.004528045654296875|unsuper_loss: 0.0
-average reward score: 5.19921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.95%) |Training time=0.47s (21.42%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3318|ppo_ep: 1|act_loss: 0.027679443359375|cri_loss: 0.01447296142578125|unsuper_loss: 0.0
-average reward score: 4.43359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.55%) |Training time=0.48s (21.90%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-[2023-04-14 10:49:56,165] [INFO] [logging.py:96:log_dist] [Rank 0] step=3320, skipped=45, lr=[6.472337531309861e-06, 6.472337531309861e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:49:56,578] [INFO] [timer.py:199:stop] epoch=0/micro_step=3320/global_step=3320, RunningAvgSamplesPerSec=105.78531663292775, CurrSamplesPerSec=45.46805004077017, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:49:56,671] [INFO] [logging.py:96:log_dist] [Rank 0] step=3320, skipped=51, lr=[3.358968458696486e-06, 3.358968458696486e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3319|ppo_ep: 1|act_loss: -0.0241241455078125|cri_loss: -0.00957489013671875|unsuper_loss: 0.0
-average reward score: 4.80078125
--------------------------------------------------------------------------------------
-|E2E latency=2.57s |Gather latency=0.00s (0.00%) |Generate time=1.60s (62.44%) |Training time=0.87s (33.74%) |Others=0.10 (3.82%)|CurSamplesPerSec=12.46 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3320|ppo_ep: 1|act_loss: -0.02484130859375|cri_loss: -0.01227569580078125|unsuper_loss: 0.0
-average reward score: 5.1328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.58%) |Training time=0.47s (21.87%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3321|ppo_ep: 1|act_loss: -0.024169921875|cri_loss: -0.01117706298828125|unsuper_loss: 0.0
-average reward score: 4.96484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.60%) |Training time=0.47s (21.83%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3322|ppo_ep: 1|act_loss: -0.0293121337890625|cri_loss: -0.0143890380859375|unsuper_loss: 0.0
-average reward score: 5.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.53%) |Training time=0.48s (21.91%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3323|ppo_ep: 1|act_loss: 0.0419921875|cri_loss: 0.022247314453125|unsuper_loss: 0.0
-average reward score: 5.13671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.62%) |Training time=0.47s (21.85%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3324|ppo_ep: 1|act_loss: 0.067626953125|cri_loss: 0.0350341796875|unsuper_loss: 0.0
-average reward score: 5.4296875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.67%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3325|ppo_ep: 1|act_loss: -0.007579803466796875|cri_loss: -0.003299713134765625|unsuper_loss: 0.0
-average reward score: 6.6953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.94%) |Training time=0.47s (21.49%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3326|ppo_ep: 1|act_loss: 0.01213836669921875|cri_loss: 0.006786346435546875|unsuper_loss: 0.0
-average reward score: 5.61328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.47s (21.45%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3327|ppo_ep: 1|act_loss: -0.00235748291015625|cri_loss: -0.0010938644409179688|unsuper_loss: 0.0
-average reward score: 5.87109375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.05%) |Training time=0.46s (21.27%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3328|ppo_ep: 1|act_loss: -0.007213592529296875|cri_loss: -0.003448486328125|unsuper_loss: 0.0
-average reward score: 5.1953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.16%) |Training time=0.44s (20.27%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
-[2023-04-14 10:50:18,251] [INFO] [logging.py:96:log_dist] [Rank 0] step=3330, skipped=45, lr=[6.454865368272448e-06, 6.454865368272448e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:50:18,270] [INFO] [timer.py:199:stop] epoch=0/micro_step=3330/global_step=3330, RunningAvgSamplesPerSec=105.78267192941124, CurrSamplesPerSec=102.44838222396594, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:50:18,363] [INFO] [logging.py:96:log_dist] [Rank 0] step=3330, skipped=51, lr=[3.3499230993274857e-06, 3.3499230993274857e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3329|ppo_ep: 1|act_loss: -0.0174560546875|cri_loss: -0.00833892822265625|unsuper_loss: 0.0
-average reward score: 4.8203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.59%) |Training time=0.48s (21.86%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3330|ppo_ep: 1|act_loss: 0.0152130126953125|cri_loss: 0.00783538818359375|unsuper_loss: 0.0
-average reward score: 5.546875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.17%) |Training time=0.47s (21.28%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3331|ppo_ep: 1|act_loss: -0.003993988037109375|cri_loss: -0.0018301010131835938|unsuper_loss: 0.0
-average reward score: 5.2109375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.87%) |Training time=0.47s (21.59%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3332|ppo_ep: 1|act_loss: 0.0043182373046875|cri_loss: 0.002300262451171875|unsuper_loss: 0.0
-average reward score: 5.41015625
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.66%) |Training time=0.47s (20.11%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.70 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3333|ppo_ep: 1|act_loss: 0.006855010986328125|cri_loss: 0.00409698486328125|unsuper_loss: 0.0
-average reward score: 5.30859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.62%) |Training time=0.47s (21.79%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3334|ppo_ep: 1|act_loss: -0.03057861328125|cri_loss: -0.014862060546875|unsuper_loss: 0.0
-average reward score: 5.078125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.82%) |Training time=0.47s (21.64%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3335|ppo_ep: 1|act_loss: -0.01727294921875|cri_loss: -0.007293701171875|unsuper_loss: 0.0
-average reward score: 5.046875
--------------------------------------------------------------------------------------
-|E2E latency=2.50s |Gather latency=0.00s (0.00%) |Generate time=1.64s (65.62%) |Training time=0.47s (18.62%) |Others=0.39 (15.76%)|CurSamplesPerSec=12.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3336|ppo_ep: 1|act_loss: -0.0207061767578125|cri_loss: -0.0089874267578125|unsuper_loss: 0.0
-average reward score: 5.515625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.39%) |Training time=0.48s (22.03%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3337|ppo_ep: 1|act_loss: -0.00792694091796875|cri_loss: -0.0018768310546875|unsuper_loss: 0.0
-average reward score: 4.9453125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.46%) |Training time=0.48s (21.97%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3338|ppo_ep: 1|act_loss: -0.0087890625|cri_loss: -0.004047393798828125|unsuper_loss: 0.0
-average reward score: 5.15625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.09%) |Training time=0.47s (21.38%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.46
-[2023-04-14 10:50:40,513] [INFO] [logging.py:96:log_dist] [Rank 0] step=3340, skipped=45, lr=[6.437369046698439e-06, 6.437369046698439e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:50:40,532] [INFO] [timer.py:199:stop] epoch=0/micro_step=3340/global_step=3340, RunningAvgSamplesPerSec=105.77579222271166, CurrSamplesPerSec=102.9543915524116, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:50:40,626] [INFO] [logging.py:96:log_dist] [Rank 0] step=3340, skipped=51, lr=[3.3408651420478265e-06, 3.3408651420478265e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3339|ppo_ep: 1|act_loss: 0.002201080322265625|cri_loss: 0.0015249252319335938|unsuper_loss: 0.0
-average reward score: 5.5
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.63%) |Training time=0.47s (21.78%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3340|ppo_ep: 1|act_loss: 0.00557708740234375|cri_loss: 0.004730224609375|unsuper_loss: 0.0
-average reward score: 5.35546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.32%) |Training time=0.46s (21.10%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3341|ppo_ep: 1|act_loss: 0.05126953125|cri_loss: 0.0268707275390625|unsuper_loss: 0.0
-average reward score: 5.6640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.62%) |Training time=0.47s (21.85%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3342|ppo_ep: 1|act_loss: 0.00695037841796875|cri_loss: 0.004302978515625|unsuper_loss: 0.0
-average reward score: 4.93359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.46s (21.22%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3343|ppo_ep: 1|act_loss: -0.030364990234375|cri_loss: -0.01422882080078125|unsuper_loss: 0.0
-average reward score: 5.19140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.30%) |Training time=0.46s (21.11%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3344|ppo_ep: 1|act_loss: -0.0138092041015625|cri_loss: -0.006580352783203125|unsuper_loss: 0.0
-average reward score: 5.484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.90%) |Training time=0.49s (22.55%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3345|ppo_ep: 1|act_loss: -0.01995849609375|cri_loss: -0.0095062255859375|unsuper_loss: 0.0
-average reward score: 5.5703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.00%) |Training time=0.49s (22.43%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3346|ppo_ep: 1|act_loss: -0.03643798828125|cri_loss: -0.017333984375|unsuper_loss: 0.0
-average reward score: 5.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.88%) |Training time=0.49s (20.90%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3347|ppo_ep: 1|act_loss: -0.01500701904296875|cri_loss: -0.00693511962890625|unsuper_loss: 0.0
-average reward score: 6.03125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.25%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3348|ppo_ep: 1|act_loss: -0.028533935546875|cri_loss: -0.0125885009765625|unsuper_loss: 0.0
-average reward score: 6.01171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.93%) |Training time=0.49s (22.50%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-[2023-04-14 10:51:02,346] [INFO] [logging.py:96:log_dist] [Rank 0] step=3350, skipped=45, lr=[6.41984882592553e-06, 6.41984882592553e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:51:02,364] [INFO] [timer.py:199:stop] epoch=0/micro_step=3350/global_step=3350, RunningAvgSamplesPerSec=105.76279625643068, CurrSamplesPerSec=96.08421076745537, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:51:02,457] [INFO] [logging.py:96:log_dist] [Rank 0] step=3350, skipped=51, lr=[3.3317947211182846e-06, 3.3317947211182846e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3349|ppo_ep: 1|act_loss: 0.027587890625|cri_loss: 0.0145263671875|unsuper_loss: 0.0
-average reward score: 5.62890625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.73%) |Training time=0.50s (22.78%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3350|ppo_ep: 1|act_loss: 0.043243408203125|cri_loss: 0.0227203369140625|unsuper_loss: 0.0
-average reward score: 5.2578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.06%) |Training time=0.48s (22.39%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3351|ppo_ep: 1|act_loss: 0.0242156982421875|cri_loss: 0.0243988037109375|unsuper_loss: 0.0
-average reward score: 6.07421875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.77%) |Training time=0.49s (22.72%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3352|ppo_ep: 1|act_loss: -0.011138916015625|cri_loss: -0.00481414794921875|unsuper_loss: 0.0
-average reward score: 4.90625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.36%) |Training time=0.51s (23.15%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3353|ppo_ep: 1|act_loss: -0.0261077880859375|cri_loss: -0.01076507568359375|unsuper_loss: 0.0
-average reward score: 5.7578125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.93%) |Training time=0.49s (22.52%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3354|ppo_ep: 1|act_loss: 0.0078125|cri_loss: 0.004383087158203125|unsuper_loss: 0.0
-average reward score: 4.98046875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.92%) |Training time=0.47s (21.52%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3355|ppo_ep: 1|act_loss: -0.020904541015625|cri_loss: -0.0100555419921875|unsuper_loss: 0.0
-average reward score: 5.5625
--------------------------------------------------------------------------------------
-|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.61s (68.06%) |Training time=0.46s (19.38%) |Others=0.30 (12.56%)|CurSamplesPerSec=13.51 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3356|ppo_ep: 1|act_loss: 0.001373291015625|cri_loss: 0.00147247314453125|unsuper_loss: 0.0
-average reward score: 5.21875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.69%) |Training time=0.41s (18.72%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3357|ppo_ep: 1|act_loss: 0.03924560546875|cri_loss: 0.02197265625|unsuper_loss: 0.0
-average reward score: 3.931640625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.27%) |Training time=0.46s (21.20%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3358|ppo_ep: 1|act_loss: 0.00650787353515625|cri_loss: 0.0037593841552734375|unsuper_loss: 0.0
-average reward score: 5.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.35%) |Training time=0.48s (22.11%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.46
-[2023-04-14 10:51:24,302] [INFO] [logging.py:96:log_dist] [Rank 0] step=3360, skipped=45, lr=[6.402304965645658e-06, 6.402304965645658e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:51:24,320] [INFO] [timer.py:199:stop] epoch=0/micro_step=3360/global_step=3360, RunningAvgSamplesPerSec=105.75462987127783, CurrSamplesPerSec=103.27904435641491, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:51:24,413] [INFO] [logging.py:96:log_dist] [Rank 0] step=3360, skipped=51, lr=[3.32271197098438e-06, 3.32271197098438e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3359|ppo_ep: 1|act_loss: -0.0139617919921875|cri_loss: -0.00681304931640625|unsuper_loss: 0.0
-average reward score: 5.40625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.69%) |Training time=0.47s (21.79%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3360|ppo_ep: 1|act_loss: 0.014404296875|cri_loss: 0.00774383544921875|unsuper_loss: 0.0
-average reward score: 4.80078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.28%) |Training time=0.46s (21.18%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3361|ppo_ep: 1|act_loss: -0.02081298828125|cri_loss: -0.0099029541015625|unsuper_loss: 0.0
-average reward score: 4.734375
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.38%) |Training time=0.46s (19.43%) |Others=0.10 (4.19%)|CurSamplesPerSec=13.65 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3362|ppo_ep: 1|act_loss: 0.01715087890625|cri_loss: 0.0088348388671875|unsuper_loss: 0.0
-average reward score: 6.21875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.65%) |Training time=0.47s (21.82%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3363|ppo_ep: 1|act_loss: 0.0213470458984375|cri_loss: 0.01126861572265625|unsuper_loss: 0.0
-average reward score: 4.8359375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.58%) |Training time=0.48s (21.91%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3364|ppo_ep: 1|act_loss: -0.0362548828125|cri_loss: -0.0171661376953125|unsuper_loss: 0.0
-average reward score: 4.921875
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.91%) |Training time=0.48s (21.63%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3365|ppo_ep: 1|act_loss: -0.009552001953125|cri_loss: -0.00391387939453125|unsuper_loss: 0.0
-average reward score: 5.21875
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.21%) |Training time=0.49s (21.48%) |Others=0.10 (4.31%)|CurSamplesPerSec=14.01 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3366|ppo_ep: 1|act_loss: 0.05206298828125|cri_loss: 0.02874755859375|unsuper_loss: 0.0
-average reward score: 5.08984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.27%) |Training time=0.48s (22.21%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3367|ppo_ep: 1|act_loss: 0.013092041015625|cri_loss: 0.007740020751953125|unsuper_loss: 0.0
-average reward score: 5.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.85%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3368|ppo_ep: 1|act_loss: 0.0022411346435546875|cri_loss: 0.0022373199462890625|unsuper_loss: 0.0
-average reward score: 5.6640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.51%) |Training time=0.48s (21.94%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-[2023-04-14 10:51:46,316] [INFO] [logging.py:96:log_dist] [Rank 0] step=3370, skipped=45, lr=[6.384737725901154e-06, 6.384737725901154e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:51:46,334] [INFO] [timer.py:199:stop] epoch=0/micro_step=3370/global_step=3370, RunningAvgSamplesPerSec=105.74734784413464, CurrSamplesPerSec=107.7754519627477, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:51:46,426] [INFO] [logging.py:96:log_dist] [Rank 0] step=3370, skipped=51, lr=[3.3136170262743816e-06, 3.3136170262743816e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3369|ppo_ep: 1|act_loss: -0.03704833984375|cri_loss: -0.016998291015625|unsuper_loss: 0.0
-average reward score: 4.9765625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.46s (21.27%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3370|ppo_ep: 1|act_loss: 0.003391265869140625|cri_loss: 0.0019388198852539062|unsuper_loss: 0.0
-average reward score: 4.89453125
--------------------------------------------------------------------------------------
-|E2E latency=2.47s |Gather latency=0.00s (0.00%) |Generate time=1.60s (64.53%) |Training time=0.47s (18.98%) |Others=0.41 (16.49%)|CurSamplesPerSec=12.93 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3371|ppo_ep: 1|act_loss: 0.030426025390625|cri_loss: 0.0155181884765625|unsuper_loss: 0.0
-average reward score: 5.2109375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.47s (21.48%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3372|ppo_ep: 1|act_loss: -0.01071929931640625|cri_loss: -0.004619598388671875|unsuper_loss: 0.0
-average reward score: 5.71875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.98%) |Training time=0.46s (21.37%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3373|ppo_ep: 1|act_loss: -0.0019855499267578125|cri_loss: -0.00032901763916015625|unsuper_loss: 0.0
-average reward score: 4.890625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.84%) |Training time=0.50s (22.65%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3374|ppo_ep: 1|act_loss: 0.030792236328125|cri_loss: 0.018341064453125|unsuper_loss: 0.0
-average reward score: 5.0078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.01%) |Training time=0.49s (22.41%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3375|ppo_ep: 1|act_loss: -0.026336669921875|cri_loss: -0.01236724853515625|unsuper_loss: 0.0
-average reward score: 4.53125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.59%) |Training time=0.45s (20.88%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3376|ppo_ep: 1|act_loss: 0.0124359130859375|cri_loss: 0.00743865966796875|unsuper_loss: 0.0
-average reward score: 5.7890625
--------------------------------------------------------------------------------------
-|E2E latency=2.66s |Gather latency=0.00s (0.00%) |Generate time=1.79s (67.30%) |Training time=0.47s (17.60%) |Others=0.40 (15.10%)|CurSamplesPerSec=12.01 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3377|ppo_ep: 1|act_loss: 0.01904296875|cri_loss: 0.010223388671875|unsuper_loss: 0.0
-average reward score: 5.49609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.68s (77.47%) |Training time=0.39s (17.97%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3378|ppo_ep: 1|act_loss: 0.022216796875|cri_loss: 0.01214599609375|unsuper_loss: 0.0
-average reward score: 4.265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.22%) |Training time=0.46s (21.19%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-[2023-04-14 10:52:08,821] [INFO] [logging.py:96:log_dist] [Rank 0] step=3380, skipped=45, lr=[6.367147367080889e-06, 6.367147367080889e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:52:08,839] [INFO] [timer.py:199:stop] epoch=0/micro_step=3380/global_step=3380, RunningAvgSamplesPerSec=105.75309889489947, CurrSamplesPerSec=113.50777958381468, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:52:08,932] [INFO] [logging.py:96:log_dist] [Rank 0] step=3380, skipped=51, lr=[3.3045100217973093e-06, 3.3045100217973093e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3379|ppo_ep: 1|act_loss: -0.0112152099609375|cri_loss: -0.005474090576171875|unsuper_loss: 0.0
-average reward score: 4.94140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.84%) |Training time=0.45s (20.59%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3380|ppo_ep: 1|act_loss: 0.055023193359375|cri_loss: 0.028564453125|unsuper_loss: 0.0
-average reward score: 4.8984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.38%) |Training time=0.43s (20.03%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3381|ppo_ep: 1|act_loss: 0.0017099380493164062|cri_loss: 0.00150299072265625|unsuper_loss: 0.0
-average reward score: 5.7109375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.80%) |Training time=0.45s (20.65%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3382|ppo_ep: 1|act_loss: 0.032318115234375|cri_loss: 0.016998291015625|unsuper_loss: 0.0
-average reward score: 4.984375
--------------------------------------------------------------------------------------
-|E2E latency=2.63s |Gather latency=0.00s (0.00%) |Generate time=1.66s (63.03%) |Training time=0.41s (15.45%) |Others=0.57 (21.51%)|CurSamplesPerSec=12.17 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3383|ppo_ep: 1|act_loss: -0.01104736328125|cri_loss: -0.005268096923828125|unsuper_loss: 0.0
-average reward score: 5.41015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.86%) |Training time=0.42s (19.56%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3384|ppo_ep: 1|act_loss: -0.00569915771484375|cri_loss: -0.00243377685546875|unsuper_loss: 0.0
-average reward score: 5.8046875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.71%) |Training time=0.47s (21.59%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3385|ppo_ep: 1|act_loss: -7.152557373046875e-05|cri_loss: 0.0007443428039550781|unsuper_loss: 0.0
-average reward score: 5.109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.72%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3386|ppo_ep: 1|act_loss: -0.0491943359375|cri_loss: -0.0230255126953125|unsuper_loss: 0.0
-average reward score: 5.734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.90%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3387|ppo_ep: 1|act_loss: -0.036865234375|cri_loss: -0.0172271728515625|unsuper_loss: 0.0
-average reward score: 5.65234375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.95%) |Training time=0.49s (22.51%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3388|ppo_ep: 1|act_loss: -0.03228759765625|cri_loss: -0.01568603515625|unsuper_loss: 0.0
-average reward score: 5.3828125
--------------------------------------------------------------------------------------
-|E2E latency=2.44s |Gather latency=0.00s (0.00%) |Generate time=1.59s (65.28%) |Training time=0.46s (18.96%) |Others=0.38 (15.76%)|CurSamplesPerSec=13.11 |AvgSamplesPerSec=14.46
-[2023-04-14 10:52:31,218] [INFO] [logging.py:96:log_dist] [Rank 0] step=3390, skipped=45, lr=[6.349534149916417e-06, 6.349534149916417e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:52:31,236] [INFO] [timer.py:199:stop] epoch=0/micro_step=3390/global_step=3390, RunningAvgSamplesPerSec=105.76502510812344, CurrSamplesPerSec=106.6209268758078, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:52:31,328] [INFO] [logging.py:96:log_dist] [Rank 0] step=3390, skipped=51, lr=[3.2953910925409387e-06, 3.2953910925409387e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3389|ppo_ep: 1|act_loss: -0.0055389404296875|cri_loss: -0.002254486083984375|unsuper_loss: 0.0
-average reward score: 5.296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.52%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3390|ppo_ep: 1|act_loss: -0.0277252197265625|cri_loss: -0.01322174072265625|unsuper_loss: 0.0
-average reward score: 5.171875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.37%) |Training time=0.45s (21.01%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3391|ppo_ep: 1|act_loss: -0.03997802734375|cri_loss: -0.018768310546875|unsuper_loss: 0.0
-average reward score: 4.984375
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.69%) |Training time=0.47s (20.06%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3392|ppo_ep: 1|act_loss: 0.0065765380859375|cri_loss: 0.0040283203125|unsuper_loss: 0.0
-average reward score: 5.6171875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.41%) |Training time=0.45s (21.01%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3393|ppo_ep: 1|act_loss: 0.013397216796875|cri_loss: 0.007022857666015625|unsuper_loss: 0.0
-average reward score: 5.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.65s (74.80%) |Training time=0.46s (20.74%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.54 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3394|ppo_ep: 1|act_loss: 0.007198333740234375|cri_loss: 0.00415802001953125|unsuper_loss: 0.0
-average reward score: 5.375
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.72%) |Training time=0.48s (20.94%) |Others=0.10 (4.34%)|CurSamplesPerSec=14.09 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3395|ppo_ep: 1|act_loss: 0.0068206787109375|cri_loss: 0.004573822021484375|unsuper_loss: 0.0
-average reward score: 5.94921875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.83%) |Training time=0.46s (21.56%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3396|ppo_ep: 1|act_loss: 0.0765380859375|cri_loss: 0.0452880859375|unsuper_loss: 0.0
-average reward score: 5.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.73%) |Training time=0.47s (21.68%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3397|ppo_ep: 1|act_loss: 0.0015411376953125|cri_loss: 0.001132965087890625|unsuper_loss: 0.0
-average reward score: 5.71875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.75%) |Training time=0.46s (21.66%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3398|ppo_ep: 1|act_loss: -0.00408935546875|cri_loss: -0.0012340545654296875|unsuper_loss: 0.0
-average reward score: 5.08984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.46s (21.54%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
-[2023-04-14 10:52:53,057] [INFO] [logging.py:96:log_dist] [Rank 0] step=3400, skipped=45, lr=[6.331898335478102e-06, 6.331898335478102e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:52:53,075] [INFO] [timer.py:199:stop] epoch=0/micro_step=3400/global_step=3400, RunningAvgSamplesPerSec=105.76780238263315, CurrSamplesPerSec=106.7389575160306, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:52:53,167] [INFO] [logging.py:96:log_dist] [Rank 0] step=3400, skipped=51, lr=[3.2862603736698014e-06, 3.2862603736698014e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3399|ppo_ep: 1|act_loss: -0.0025119781494140625|cri_loss: 0.00067901611328125|unsuper_loss: 0.0
-average reward score: 5.29296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.89%) |Training time=0.46s (21.55%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3400|ppo_ep: 1|act_loss: 0.01654052734375|cri_loss: 0.0099945068359375|unsuper_loss: 0.0
-average reward score: 4.66015625
--------------------------------------------------------------------------------------
-|E2E latency=2.50s |Gather latency=0.00s (0.00%) |Generate time=1.59s (63.56%) |Training time=0.46s (18.35%) |Others=0.45 (18.09%)|CurSamplesPerSec=12.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3401|ppo_ep: 1|act_loss: 0.03460693359375|cri_loss: 0.0185394287109375|unsuper_loss: 0.0
-average reward score: 4.87109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.83%) |Training time=0.46s (21.59%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3402|ppo_ep: 1|act_loss: 0.0011186599731445312|cri_loss: 0.0008335113525390625|unsuper_loss: 0.0
-average reward score: 5.6796875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.56%) |Training time=0.48s (21.89%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3403|ppo_ep: 1|act_loss: -0.007110595703125|cri_loss: -0.003444671630859375|unsuper_loss: 0.0
-average reward score: 5.25
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.73%) |Training time=0.47s (21.71%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3404|ppo_ep: 1|act_loss: 0.01158905029296875|cri_loss: 0.006256103515625|unsuper_loss: 0.0
-average reward score: 5.59375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.74%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3405|ppo_ep: 1|act_loss: -0.0084381103515625|cri_loss: -0.0037441253662109375|unsuper_loss: 0.0
-average reward score: 5.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.62s |Gather latency=0.00s (0.00%) |Generate time=1.59s (60.72%) |Training time=0.46s (17.66%) |Others=0.57 (21.62%)|CurSamplesPerSec=12.22 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3406|ppo_ep: 1|act_loss: 0.0013408660888671875|cri_loss: 0.0010423660278320312|unsuper_loss: 0.0
-average reward score: 5.1953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (21.99%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3407|ppo_ep: 1|act_loss: 0.00499725341796875|cri_loss: 0.0028533935546875|unsuper_loss: 0.0
-average reward score: 5.265625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.57%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3408|ppo_ep: 1|act_loss: -0.006114959716796875|cri_loss: -0.001445770263671875|unsuper_loss: 0.0
-average reward score: 5.77734375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.68%) |Training time=0.47s (21.61%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-[2023-04-14 10:53:15,449] [INFO] [logging.py:96:log_dist] [Rank 0] step=3410, skipped=45, lr=[6.314240185171264e-06, 6.314240185171264e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:53:15,467] [INFO] [timer.py:199:stop] epoch=0/micro_step=3410/global_step=3410, RunningAvgSamplesPerSec=105.76521709293858, CurrSamplesPerSec=104.83900745727125, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:53:15,560] [INFO] [logging.py:96:log_dist] [Rank 0] step=3410, skipped=51, lr=[3.2771180005231746e-06, 3.2771180005231746e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3409|ppo_ep: 1|act_loss: -0.02142333984375|cri_loss: -0.0105133056640625|unsuper_loss: 0.0
-average reward score: 5.875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.47s (21.81%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3410|ppo_ep: 1|act_loss: -0.02252197265625|cri_loss: -0.00972747802734375|unsuper_loss: 0.0
-average reward score: 4.953125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.10%) |Training time=0.46s (21.35%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3411|ppo_ep: 1|act_loss: -0.0213623046875|cri_loss: -0.0104827880859375|unsuper_loss: 0.0
-average reward score: 5.67578125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.98%) |Training time=0.46s (21.46%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
-[2023-04-14 10:53:21,887] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-[2023-04-14 10:53:21,971] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 3412|ppo_ep: 1|act_loss: -0.006343841552734375|cri_loss: -0.00311279296875|unsuper_loss: 0.0
-average reward score: 6.52734375
--------------------------------------------------------------------------------------
-|E2E latency=2.10s |Gather latency=0.00s (0.00%) |Generate time=1.59s (75.58%) |Training time=0.43s (20.28%) |Others=0.09 (4.14%)|CurSamplesPerSec=15.20 |AvgSamplesPerSec=14.46
-[2023-04-14 10:53:23,996] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-[2023-04-14 10:53:24,080] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 3413|ppo_ep: 1|act_loss: 0.0185394287109375|cri_loss: 0.011444091796875|unsuper_loss: 0.0
-average reward score: 5.8984375
--------------------------------------------------------------------------------------
-|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.59s (75.31%) |Training time=0.43s (20.55%) |Others=0.09 (4.13%)|CurSamplesPerSec=15.17 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3414|ppo_ep: 1|act_loss: -0.0077972412109375|cri_loss: -0.0034694671630859375|unsuper_loss: 0.0
-average reward score: 5.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.74%) |Training time=0.47s (21.70%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3415|ppo_ep: 1|act_loss: -0.0147552490234375|cri_loss: -0.0070343017578125|unsuper_loss: 0.0
-average reward score: 5.66796875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.21%) |Training time=0.45s (21.19%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3416|ppo_ep: 1|act_loss: -0.017578125|cri_loss: -0.0079498291015625|unsuper_loss: 0.0
-average reward score: 5.12109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.50%) |Training time=0.47s (21.94%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3417|ppo_ep: 1|act_loss: 0.03759765625|cri_loss: 0.02191162109375|unsuper_loss: 0.0
-average reward score: 4.7890625
--------------------------------------------------------------------------------------
-|E2E latency=3.73s |Gather latency=0.00s (0.00%) |Generate time=1.59s (42.64%) |Training time=0.46s (12.46%) |Others=1.68 (44.90%)|CurSamplesPerSec=8.58 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3418|ppo_ep: 1|act_loss: -0.01320648193359375|cri_loss: -0.005695343017578125|unsuper_loss: 0.0
-average reward score: 5.10546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.49%) |Training time=0.45s (20.94%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45
-[2023-04-14 10:53:38,444] [INFO] [logging.py:96:log_dist] [Rank 0] step=3420, skipped=47, lr=[6.300097758976281e-06, 6.300097758976281e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:53:38,462] [INFO] [timer.py:199:stop] epoch=0/micro_step=3420/global_step=3420, RunningAvgSamplesPerSec=105.77514620401836, CurrSamplesPerSec=106.69610729889406, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:53:38,555] [INFO] [logging.py:96:log_dist] [Rank 0] step=3420, skipped=53, lr=[3.2697958019858506e-06, 3.2697958019858506e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3419|ppo_ep: 1|act_loss: 0.02777099609375|cri_loss: 0.0148773193359375|unsuper_loss: 0.0
-average reward score: 4.8984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.87%) |Training time=0.46s (21.56%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3420|ppo_ep: 1|act_loss: 0.00020694732666015625|cri_loss: 0.0003859996795654297|unsuper_loss: 0.0
-average reward score: 5.90234375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.21%) |Training time=0.46s (21.24%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3421|ppo_ep: 1|act_loss: -0.0121612548828125|cri_loss: -0.005523681640625|unsuper_loss: 0.0
-average reward score: 5.30859375
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.95%) |Training time=0.46s (19.82%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3422|ppo_ep: 1|act_loss: -0.0455322265625|cri_loss: -0.02069091796875|unsuper_loss: 0.0
-average reward score: 5.1015625
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.61s (72.62%) |Training time=0.51s (22.96%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.42 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3423|ppo_ep: 1|act_loss: -0.022003173828125|cri_loss: -0.01036834716796875|unsuper_loss: 0.0
-average reward score: 4.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.91%) |Training time=0.46s (21.46%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3424|ppo_ep: 1|act_loss: 0.0006990432739257812|cri_loss: 0.000621795654296875|unsuper_loss: 0.0
-average reward score: 5.796875
--------------------------------------------------------------------------------------
-|E2E latency=3.00s |Gather latency=0.00s (0.00%) |Generate time=1.59s (53.00%) |Training time=0.46s (15.49%) |Others=0.94 (31.50%)|CurSamplesPerSec=10.68 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3425|ppo_ep: 1|act_loss: -0.023040771484375|cri_loss: -0.010498046875|unsuper_loss: 0.0
-average reward score: 5.58203125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.47s (21.62%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3426|ppo_ep: 1|act_loss: -0.00516510009765625|cri_loss: -0.0021610260009765625|unsuper_loss: 0.0
-average reward score: 5.96875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.91%) |Training time=0.46s (21.51%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3427|ppo_ep: 1|act_loss: 0.0189208984375|cri_loss: 0.01000213623046875|unsuper_loss: 0.0
-average reward score: 5.53515625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.46s (21.61%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3428|ppo_ep: 1|act_loss: 0.0011386871337890625|cri_loss: 0.002056121826171875|unsuper_loss: 0.0
-average reward score: 4.28515625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.08%) |Training time=0.46s (21.30%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.45
-[2023-04-14 10:54:01,026] [INFO] [logging.py:96:log_dist] [Rank 0] step=3430, skipped=47, lr=[6.2824000639017895e-06, 6.2824000639017895e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:54:01,044] [INFO] [timer.py:199:stop] epoch=0/micro_step=3430/global_step=3430, RunningAvgSamplesPerSec=105.77657682475913, CurrSamplesPerSec=108.1306645290452, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:54:01,137] [INFO] [logging.py:96:log_dist] [Rank 0] step=3430, skipped=53, lr=[3.2606327927484516e-06, 3.2606327927484516e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3429|ppo_ep: 1|act_loss: 0.0309295654296875|cri_loss: 0.0161285400390625|unsuper_loss: 0.0
-average reward score: 5.03125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.39%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3430|ppo_ep: 1|act_loss: -0.03143310546875|cri_loss: -0.0149078369140625|unsuper_loss: 0.0
-average reward score: 5.26953125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.95%) |Training time=0.46s (21.47%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3431|ppo_ep: 1|act_loss: 0.007232666015625|cri_loss: 0.003753662109375|unsuper_loss: 0.0
-average reward score: 4.51171875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.21%) |Training time=0.46s (21.23%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3432|ppo_ep: 1|act_loss: 0.0101165771484375|cri_loss: 0.00543212890625|unsuper_loss: 0.0
-average reward score: 4.859375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.99%) |Training time=0.46s (21.45%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3433|ppo_ep: 1|act_loss: -0.0355224609375|cri_loss: -0.01580810546875|unsuper_loss: 0.0
-average reward score: 4.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.28%) |Training time=0.45s (21.12%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3434|ppo_ep: 1|act_loss: -0.021392822265625|cri_loss: -0.01043701171875|unsuper_loss: 0.0
-average reward score: 4.171875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.26%) |Training time=0.46s (21.16%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3435|ppo_ep: 1|act_loss: 0.015380859375|cri_loss: 0.0092010498046875|unsuper_loss: 0.0
-average reward score: 5.02734375
--------------------------------------------------------------------------------------
-|E2E latency=2.94s |Gather latency=0.00s (0.00%) |Generate time=1.76s (59.75%) |Training time=0.47s (16.12%) |Others=0.71 (24.13%)|CurSamplesPerSec=10.88 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3436|ppo_ep: 1|act_loss: -0.01514434814453125|cri_loss: -0.0072021484375|unsuper_loss: 0.0
-average reward score: 4.5234375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.47s (21.83%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3437|ppo_ep: 1|act_loss: -0.004932403564453125|cri_loss: -0.0020294189453125|unsuper_loss: 0.0
-average reward score: 5.94921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.48s (21.92%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3438|ppo_ep: 1|act_loss: 0.00617218017578125|cri_loss: 0.0031795501708984375|unsuper_loss: 0.0
-average reward score: 5.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.46s (21.43%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
-[2023-04-14 10:54:23,363] [INFO] [logging.py:96:log_dist] [Rank 0] step=3440, skipped=47, lr=[6.264680766642565e-06, 6.264680766642565e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:54:23,375] [INFO] [timer.py:199:stop] epoch=0/micro_step=3440/global_step=3440, RunningAvgSamplesPerSec=105.77759385861118, CurrSamplesPerSec=104.42456438456774, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:54:23,467] [INFO] [logging.py:96:log_dist] [Rank 0] step=3440, skipped=53, lr=[3.251458509098137e-06, 3.251458509098137e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3439|ppo_ep: 1|act_loss: -0.005565643310546875|cri_loss: -0.002216339111328125|unsuper_loss: 0.0
-average reward score: 5.78125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.68%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3440|ppo_ep: 1|act_loss: -0.02606201171875|cri_loss: -0.01192474365234375|unsuper_loss: 0.0
-average reward score: 5.6640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.95%) |Training time=0.47s (21.52%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3441|ppo_ep: 1|act_loss: 0.01318359375|cri_loss: 0.00732421875|unsuper_loss: 0.0
-average reward score: 4.41015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.23%) |Training time=0.46s (21.21%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3442|ppo_ep: 1|act_loss: 0.0325927734375|cri_loss: 0.017913818359375|unsuper_loss: 0.0
-average reward score: 5.30859375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.94%) |Training time=0.46s (21.51%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3443|ppo_ep: 1|act_loss: -0.014251708984375|cri_loss: -0.006694793701171875|unsuper_loss: 0.0
-average reward score: 5.0625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.94%) |Training time=0.46s (21.50%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3444|ppo_ep: 1|act_loss: 0.0066375732421875|cri_loss: 0.0035572052001953125|unsuper_loss: 0.0
-average reward score: 5.30859375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.26%) |Training time=0.46s (21.17%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3445|ppo_ep: 1|act_loss: 0.015838623046875|cri_loss: 0.00957489013671875|unsuper_loss: 0.0
-average reward score: 5.92578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.46s (21.24%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3446|ppo_ep: 1|act_loss: -0.013275146484375|cri_loss: -0.00611114501953125|unsuper_loss: 0.0
-average reward score: 5.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.16%) |Training time=0.46s (21.28%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3447|ppo_ep: 1|act_loss: -0.018157958984375|cri_loss: -0.0085296630859375|unsuper_loss: 0.0
-average reward score: 5.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.45%) |Training time=0.46s (21.04%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3448|ppo_ep: 1|act_loss: -0.00926971435546875|cri_loss: -0.00391387939453125|unsuper_loss: 0.0
-average reward score: 5.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.96%) |Training time=0.46s (21.49%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
-[2023-04-14 10:54:44,967] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
-[2023-04-14 10:54:44,967] [INFO] [logging.py:96:log_dist] [Rank 0] step=3450, skipped=48, lr=[6.248715146308981e-06, 6.248715146308981e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:54:44,967] [INFO] [timer.py:199:stop] epoch=0/micro_step=3450/global_step=3450, RunningAvgSamplesPerSec=105.783939143326, CurrSamplesPerSec=117.16853453920088, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:54:45,059] [INFO] [logging.py:96:log_dist] [Rank 0] step=3450, skipped=53, lr=[3.2422730870199237e-06, 3.2422730870199237e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3449|ppo_ep: 1|act_loss: -0.015716552734375|cri_loss: -0.00740814208984375|unsuper_loss: 0.0
-average reward score: 5.8125
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.01%) |Training time=0.43s (20.38%) |Others=0.10 (4.60%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3450|ppo_ep: 1|act_loss: 0.03143310546875|cri_loss: 0.0171966552734375|unsuper_loss: 0.0
-average reward score: 4.921875
--------------------------------------------------------------------------------------
-|E2E latency=2.56s |Gather latency=0.00s (0.00%) |Generate time=1.71s (66.84%) |Training time=0.50s (19.54%) |Others=0.35 (13.62%)|CurSamplesPerSec=12.48 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3451|ppo_ep: 1|act_loss: 0.02996826171875|cri_loss: 0.01555633544921875|unsuper_loss: 0.0
-average reward score: 4.359375
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.75%) |Training time=0.45s (19.91%) |Others=0.10 (4.34%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3452|ppo_ep: 1|act_loss: 0.035980224609375|cri_loss: 0.01849365234375|unsuper_loss: 0.0
-average reward score: 4.40625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.17%) |Training time=0.46s (21.26%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3453|ppo_ep: 1|act_loss: -0.0020885467529296875|cri_loss: -0.000255584716796875|unsuper_loss: 0.0
-average reward score: 6.40234375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.25%) |Training time=0.46s (21.18%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3454|ppo_ep: 1|act_loss: -0.0262298583984375|cri_loss: -0.0119476318359375|unsuper_loss: 0.0
-average reward score: 4.9609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.90%) |Training time=0.47s (21.55%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3455|ppo_ep: 1|act_loss: -0.02520751953125|cri_loss: -0.01177215576171875|unsuper_loss: 0.0
-average reward score: 4.6328125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.39%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3456|ppo_ep: 1|act_loss: -0.0152587890625|cri_loss: -0.0072784423828125|unsuper_loss: 0.0
-average reward score: 5.41015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.17%) |Training time=0.46s (21.27%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3457|ppo_ep: 1|act_loss: 0.0140533447265625|cri_loss: 0.00745391845703125|unsuper_loss: 0.0
-average reward score: 5.16015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.94%) |Training time=0.47s (21.52%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3458|ppo_ep: 1|act_loss: 0.0024566650390625|cri_loss: 0.0014514923095703125|unsuper_loss: 0.0
-average reward score: 5.3515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.47s (21.80%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45
-[2023-04-14 10:55:07,050] [INFO] [logging.py:96:log_dist] [Rank 0] step=3460, skipped=48, lr=[6.230955528740993e-06, 6.230955528740993e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:55:07,068] [INFO] [timer.py:199:stop] epoch=0/micro_step=3460/global_step=3460, RunningAvgSamplesPerSec=105.7849063342375, CurrSamplesPerSec=107.60592251472772, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:55:07,161] [INFO] [logging.py:96:log_dist] [Rank 0] step=3460, skipped=53, lr=[3.2330766626639233e-06, 3.2330766626639233e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3459|ppo_ep: 1|act_loss: -0.0158843994140625|cri_loss: -0.006961822509765625|unsuper_loss: 0.0
-average reward score: 5.1796875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.31%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3460|ppo_ep: 1|act_loss: -0.018280029296875|cri_loss: -0.00885009765625|unsuper_loss: 0.0
-average reward score: 4.515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.95%) |Training time=0.46s (21.51%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3461|ppo_ep: 1|act_loss: 0.02325439453125|cri_loss: 0.0117645263671875|unsuper_loss: 0.0
-average reward score: 5.375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.22%) |Training time=0.46s (21.22%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3462|ppo_ep: 1|act_loss: -0.0025615692138671875|cri_loss: -0.0010423660278320312|unsuper_loss: 0.0
-average reward score: 4.71875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.61%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3463|ppo_ep: 1|act_loss: 0.0088958740234375|cri_loss: 0.0046844482421875|unsuper_loss: 0.0
-average reward score: 5.21875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.57%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3464|ppo_ep: 1|act_loss: -0.01629638671875|cri_loss: -0.00799560546875|unsuper_loss: 0.0
-average reward score: 5.26171875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.26%) |Training time=0.48s (22.18%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3465|ppo_ep: 1|act_loss: 0.009490966796875|cri_loss: 0.0052642822265625|unsuper_loss: 0.0
-average reward score: 5.73828125
--------------------------------------------------------------------------------------
-|E2E latency=3.12s |Gather latency=0.00s (0.00%) |Generate time=1.61s (51.60%) |Training time=0.51s (16.36%) |Others=1.00 (32.04%)|CurSamplesPerSec=10.24 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3466|ppo_ep: 1|act_loss: 0.0145111083984375|cri_loss: 0.0078887939453125|unsuper_loss: 0.0
-average reward score: 5.1015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.73%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3467|ppo_ep: 1|act_loss: -0.015533447265625|cri_loss: -0.007503509521484375|unsuper_loss: 0.0
-average reward score: 4.9296875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (22.07%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3468|ppo_ep: 1|act_loss: -0.014312744140625|cri_loss: -0.006694793701171875|unsuper_loss: 0.0
-average reward score: 4.9609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.48s (22.07%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
-[2023-04-14 10:55:29,606] [INFO] [logging.py:96:log_dist] [Rank 0] step=3470, skipped=48, lr=[6.213175071520385e-06, 6.213175071520385e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:55:29,625] [INFO] [timer.py:199:stop] epoch=0/micro_step=3470/global_step=3470, RunningAvgSamplesPerSec=105.77427848375432, CurrSamplesPerSec=98.66774093949864, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:55:29,717] [INFO] [logging.py:96:log_dist] [Rank 0] step=3470, skipped=53, lr=[3.2238693723433317e-06, 3.2238693723433317e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3469|ppo_ep: 1|act_loss: -0.01213836669921875|cri_loss: -0.005840301513671875|unsuper_loss: 0.0
-average reward score: 4.79296875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.97%) |Training time=0.49s (22.50%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3470|ppo_ep: 1|act_loss: -0.0100555419921875|cri_loss: -0.004817962646484375|unsuper_loss: 0.0
-average reward score: 5.42578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.14%) |Training time=0.48s (22.30%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3471|ppo_ep: 1|act_loss: -0.0176544189453125|cri_loss: -0.007389068603515625|unsuper_loss: 0.0
-average reward score: 5.41015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.39%) |Training time=0.48s (22.05%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3472|ppo_ep: 1|act_loss: -0.0164794921875|cri_loss: -0.007843017578125|unsuper_loss: 0.0
-average reward score: 4.82421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.91%) |Training time=0.49s (22.55%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3473|ppo_ep: 1|act_loss: 0.03594970703125|cri_loss: 0.018707275390625|unsuper_loss: 0.0
-average reward score: 4.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.48s (22.05%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3474|ppo_ep: 1|act_loss: 0.01245880126953125|cri_loss: 0.0072021484375|unsuper_loss: 0.0
-average reward score: 5.10546875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.38%) |Training time=0.48s (22.12%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3475|ppo_ep: 1|act_loss: 0.016845703125|cri_loss: 0.00959014892578125|unsuper_loss: 0.0
-average reward score: 5.5859375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.09%) |Training time=0.48s (22.35%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3476|ppo_ep: 1|act_loss: 0.026947021484375|cri_loss: 0.013885498046875|unsuper_loss: 0.0
-average reward score: 5.765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.04%) |Training time=0.49s (22.42%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3477|ppo_ep: 1|act_loss: -0.006870269775390625|cri_loss: -0.00278472900390625|unsuper_loss: 0.0
-average reward score: 5.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.92%) |Training time=0.46s (21.52%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3478|ppo_ep: 1|act_loss: -0.011322021484375|cri_loss: -0.00521087646484375|unsuper_loss: 0.0
-average reward score: 5.41015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.55%) |Training time=0.47s (21.90%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
-[2023-04-14 10:55:51,216] [INFO] [logging.py:96:log_dist] [Rank 0] step=3480, skipped=48, lr=[6.195374038196429e-06, 6.195374038196429e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:55:51,234] [INFO] [timer.py:199:stop] epoch=0/micro_step=3480/global_step=3480, RunningAvgSamplesPerSec=105.76188277060592, CurrSamplesPerSec=105.97196140666698, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:55:51,378] [INFO] [logging.py:96:log_dist] [Rank 0] step=3480, skipped=53, lr=[3.2146513525324026e-06, 3.2146513525324026e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3479|ppo_ep: 1|act_loss: -0.03985595703125|cri_loss: -0.0194244384765625|unsuper_loss: 0.0
-average reward score: 5.69921875
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.28%) |Training time=0.46s (21.00%) |Others=0.15 (6.72%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3480|ppo_ep: 1|act_loss: -0.005619049072265625|cri_loss: -0.0025501251220703125|unsuper_loss: 0.0
-average reward score: 4.4765625
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.74s (74.56%) |Training time=0.46s (19.97%) |Others=0.13 (5.47%)|CurSamplesPerSec=13.75 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3481|ppo_ep: 1|act_loss: -0.0032958984375|cri_loss: -0.0014524459838867188|unsuper_loss: 0.0
-average reward score: 5.3125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.86%) |Training time=0.47s (21.33%) |Others=0.11 (4.80%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3482|ppo_ep: 1|act_loss: -0.0078582763671875|cri_loss: -0.0034637451171875|unsuper_loss: 0.0
-average reward score: 5.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.52%) |Training time=0.47s (21.95%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
-epoch: 0|step: 3483|ppo_ep: 1|act_loss: 0.001445770263671875|cri_loss: 0.0008373260498046875|unsuper_loss: 0.0
-average reward score: 5.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.73%) |Training time=0.47s (21.71%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3484|ppo_ep: 1|act_loss: -0.03155517578125|cri_loss: -0.0147247314453125|unsuper_loss: 0.0
-average reward score: 5.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.09%) |Training time=0.48s (22.37%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3485|ppo_ep: 1|act_loss: -0.00524139404296875|cri_loss: -0.002017974853515625|unsuper_loss: 0.0
-average reward score: 4.71875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.47s (22.08%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3486|ppo_ep: 1|act_loss: 0.011993408203125|cri_loss: 0.006175994873046875|unsuper_loss: 0.0
-average reward score: 5.54296875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.14%) |Training time=0.48s (22.31%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3487|ppo_ep: 1|act_loss: 0.0201263427734375|cri_loss: 0.01052093505859375|unsuper_loss: 0.0
-average reward score: 5.1328125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.58%) |Training time=0.47s (21.84%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3488|ppo_ep: 1|act_loss: 0.029449462890625|cri_loss: 0.01541900634765625|unsuper_loss: 0.0
-average reward score: 4.85546875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.68%) |Training time=0.47s (21.74%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.46
-[2023-04-14 10:56:12,986] [INFO] [logging.py:96:log_dist] [Rank 0] step=3490, skipped=48, lr=[6.177552692623376e-06, 6.177552692623376e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:56:13,005] [INFO] [timer.py:199:stop] epoch=0/micro_step=3490/global_step=3490, RunningAvgSamplesPerSec=105.75557515461509, CurrSamplesPerSec=105.23719384686797, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:56:13,097] [INFO] [logging.py:96:log_dist] [Rank 0] step=3490, skipped=53, lr=[3.2054227398644253e-06, 3.2054227398644253e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3489|ppo_ep: 1|act_loss: -0.0174713134765625|cri_loss: -0.00785064697265625|unsuper_loss: 0.0
-average reward score: 5.80859375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.66%) |Training time=0.47s (21.77%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3490|ppo_ep: 1|act_loss: -0.029205322265625|cri_loss: -0.0137939453125|unsuper_loss: 0.0
-average reward score: 5.65234375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.54%) |Training time=0.47s (21.89%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3491|ppo_ep: 1|act_loss: 0.018524169921875|cri_loss: 0.01007843017578125|unsuper_loss: 0.0
-average reward score: 4.5546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.09%) |Training time=0.48s (22.33%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3492|ppo_ep: 1|act_loss: -0.0187835693359375|cri_loss: -0.00879669189453125|unsuper_loss: 0.0
-average reward score: 5.6171875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.91%) |Training time=0.46s (21.26%) |Others=0.10 (4.83%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3493|ppo_ep: 1|act_loss: -0.01885986328125|cri_loss: -0.0089111328125|unsuper_loss: 0.0
-average reward score: 4.97265625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.84%) |Training time=0.46s (21.59%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3494|ppo_ep: 1|act_loss: 0.0062103271484375|cri_loss: 0.0032501220703125|unsuper_loss: 0.0
-average reward score: 5.63671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.16%) |Training time=0.48s (22.31%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3495|ppo_ep: 1|act_loss: 0.005767822265625|cri_loss: 0.0030612945556640625|unsuper_loss: 0.0
-average reward score: 4.77734375
--------------------------------------------------------------------------------------
-|E2E latency=2.47s |Gather latency=0.00s (0.00%) |Generate time=1.65s (66.68%) |Training time=0.59s (23.83%) |Others=0.23 (9.48%)|CurSamplesPerSec=12.94 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3496|ppo_ep: 1|act_loss: 0.029144287109375|cri_loss: 0.01495361328125|unsuper_loss: 0.0
-average reward score: 5.16015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.47s (21.57%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3497|ppo_ep: 1|act_loss: -0.01514434814453125|cri_loss: -0.0066375732421875|unsuper_loss: 0.0
-average reward score: 4.75390625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.70%) |Training time=0.47s (21.75%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3498|ppo_ep: 1|act_loss: -0.020843505859375|cri_loss: -0.00994110107421875|unsuper_loss: 0.0
-average reward score: 4.4765625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.48s (22.09%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-[2023-04-14 10:56:34,865] [INFO] [logging.py:96:log_dist] [Rank 0] step=3500, skipped=48, lr=[6.159711298956566e-06, 6.159711298956566e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:56:34,883] [INFO] [timer.py:199:stop] epoch=0/micro_step=3500/global_step=3500, RunningAvgSamplesPerSec=105.74144100983754, CurrSamplesPerSec=101.71870384138222, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:56:34,976] [INFO] [logging.py:96:log_dist] [Rank 0] step=3500, skipped=53, lr=[3.1961836711297046e-06, 3.1961836711297046e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3499|ppo_ep: 1|act_loss: 0.0005273818969726562|cri_loss: 0.0006098747253417969|unsuper_loss: 0.0
-average reward score: 4.453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.48s (22.08%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3500|ppo_ep: 1|act_loss: 0.0246734619140625|cri_loss: 0.01354217529296875|unsuper_loss: 0.0
-average reward score: 5.3203125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.30%) |Training time=0.48s (22.15%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3501|ppo_ep: 1|act_loss: -0.003116607666015625|cri_loss: -0.001251220703125|unsuper_loss: 0.0
-average reward score: 5.65625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.58%) |Training time=0.48s (22.14%) |Others=0.11 (5.28%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3502|ppo_ep: 1|act_loss: -0.0015268325805664062|cri_loss: -0.0006113052368164062|unsuper_loss: 0.0
-average reward score: 5.3046875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.25%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3503|ppo_ep: 1|act_loss: -0.009063720703125|cri_loss: -0.0035953521728515625|unsuper_loss: 0.0
-average reward score: 5.234375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.45%) |Training time=0.47s (22.00%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3504|ppo_ep: 1|act_loss: -0.01104736328125|cri_loss: -0.005390167236328125|unsuper_loss: 0.0
-average reward score: 5.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.08%) |Training time=0.48s (22.37%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3505|ppo_ep: 1|act_loss: 0.01168060302734375|cri_loss: 0.00601959228515625|unsuper_loss: 0.0
-average reward score: 4.6953125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.43%) |Training time=0.47s (22.02%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3506|ppo_ep: 1|act_loss: -0.01763916015625|cri_loss: -0.00856781005859375|unsuper_loss: 0.0
-average reward score: 6.0234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.94%) |Training time=0.49s (22.50%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3507|ppo_ep: 1|act_loss: -0.0230255126953125|cri_loss: -0.01116180419921875|unsuper_loss: 0.0
-average reward score: 4.8515625
--------------------------------------------------------------------------------------
-|E2E latency=2.44s |Gather latency=0.00s (0.00%) |Generate time=1.59s (65.11%) |Training time=0.47s (19.31%) |Others=0.38 (15.58%)|CurSamplesPerSec=13.13 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3508|ppo_ep: 1|act_loss: -0.006900787353515625|cri_loss: -0.002567291259765625|unsuper_loss: 0.0
-average reward score: 5.2734375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.61%) |Others=0.10 (4.79%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
-[2023-04-14 10:56:56,750] [INFO] [logging.py:96:log_dist] [Rank 0] step=3510, skipped=48, lr=[6.141850121648488e-06, 6.141850121648488e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:56:56,769] [INFO] [timer.py:199:stop] epoch=0/micro_step=3510/global_step=3510, RunningAvgSamplesPerSec=105.72984477658514, CurrSamplesPerSec=104.4290330386313, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:56:56,861] [INFO] [logging.py:96:log_dist] [Rank 0] step=3510, skipped=53, lr=[3.186934283273525e-06, 3.186934283273525e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3509|ppo_ep: 1|act_loss: -0.028656005859375|cri_loss: -0.01386260986328125|unsuper_loss: 0.0
-average reward score: 5.9375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.69%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3510|ppo_ep: 1|act_loss: 0.002899169921875|cri_loss: 0.0016813278198242188|unsuper_loss: 0.0
-average reward score: 5.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.68s (74.55%) |Training time=0.47s (21.08%) |Others=0.10 (4.37%)|CurSamplesPerSec=14.22 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3511|ppo_ep: 1|act_loss: 0.035186767578125|cri_loss: 0.018035888671875|unsuper_loss: 0.0
-average reward score: 4.7890625
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.30%) |Training time=0.47s (20.47%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3512|ppo_ep: 1|act_loss: 0.01568603515625|cri_loss: 0.00904083251953125|unsuper_loss: 0.0
-average reward score: 4.7109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.48%) |Training time=0.47s (21.95%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3513|ppo_ep: 1|act_loss: 0.00518035888671875|cri_loss: 0.003559112548828125|unsuper_loss: 0.0
-average reward score: 5.59765625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.42%) |Training time=0.47s (22.01%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
-[2023-04-14 10:57:07,867] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 3514|ppo_ep: 1|act_loss: 0.0416259765625|cri_loss: 0.0216064453125|unsuper_loss: 0.0
-average reward score: 5.515625
--------------------------------------------------------------------------------------
-|E2E latency=2.59s |Gather latency=0.00s (0.00%) |Generate time=1.58s (60.83%) |Training time=0.48s (18.46%) |Others=0.54 (20.70%)|CurSamplesPerSec=12.34 |AvgSamplesPerSec=14.46
-[2023-04-14 10:57:10,455] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 3515|ppo_ep: 1|act_loss: 0.0184783935546875|cri_loss: 0.0100860595703125|unsuper_loss: 0.0
-average reward score: 4.76953125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.74%) |Training time=0.47s (22.17%) |Others=0.09 (4.09%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3516|ppo_ep: 1|act_loss: -0.000457763671875|cri_loss: 0.0002994537353515625|unsuper_loss: 0.0
-average reward score: 5.1171875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (22.12%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3517|ppo_ep: 1|act_loss: 0.023529052734375|cri_loss: 0.01279449462890625|unsuper_loss: 0.0
-average reward score: 4.50390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.26%) |Training time=0.48s (22.12%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3518|ppo_ep: 1|act_loss: 0.00598907470703125|cri_loss: 0.003543853759765625|unsuper_loss: 0.0
-average reward score: 4.49609375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.51%) |Training time=0.47s (21.92%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
-[2023-04-14 10:57:18,957] [INFO] [logging.py:96:log_dist] [Rank 0] step=3520, skipped=48, lr=[6.123969425444881e-06, 6.123969425444881e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:57:18,976] [INFO] [timer.py:199:stop] epoch=0/micro_step=3520/global_step=3520, RunningAvgSamplesPerSec=105.7193975041583, CurrSamplesPerSec=99.69532912568643, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:57:19,068] [INFO] [logging.py:96:log_dist] [Rank 0] step=3520, skipped=55, lr=[3.1795274353460633e-06, 3.1795274353460633e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3519|ppo_ep: 1|act_loss: -0.018646240234375|cri_loss: -0.00894927978515625|unsuper_loss: 0.0
-average reward score: 4.77734375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.48s (22.39%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3520|ppo_ep: 1|act_loss: -0.04638671875|cri_loss: -0.0218658447265625|unsuper_loss: 0.0
-average reward score: 4.53125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.39%) |Training time=0.47s (22.05%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3521|ppo_ep: 1|act_loss: -0.018463134765625|cri_loss: -0.00884246826171875|unsuper_loss: 0.0
-average reward score: 5.203125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.34%) |Training time=0.48s (22.10%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3522|ppo_ep: 1|act_loss: -0.014129638671875|cri_loss: -0.0068817138671875|unsuper_loss: 0.0
-average reward score: 5.65625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.49%) |Training time=0.47s (21.95%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3523|ppo_ep: 1|act_loss: -0.01030731201171875|cri_loss: -0.00466156005859375|unsuper_loss: 0.0
-average reward score: 5.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.37%) |Training time=0.48s (22.09%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3524|ppo_ep: 1|act_loss: 0.0089874267578125|cri_loss: 0.005382537841796875|unsuper_loss: 0.0
-average reward score: 5.046875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.99%) |Training time=0.49s (22.46%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3525|ppo_ep: 1|act_loss: 0.0082855224609375|cri_loss: 0.004657745361328125|unsuper_loss: 0.0
-average reward score: 5.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.74s (75.04%) |Training time=0.48s (20.71%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3526|ppo_ep: 1|act_loss: 0.04345703125|cri_loss: 0.02294921875|unsuper_loss: 0.0
-average reward score: 4.01953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.48s (22.01%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3527|ppo_ep: 1|act_loss: 0.0106353759765625|cri_loss: 0.00634002685546875|unsuper_loss: 0.0
-average reward score: 5.14453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.48s (22.30%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3528|ppo_ep: 1|act_loss: 0.0018854141235351562|cri_loss: 0.0011796951293945312|unsuper_loss: 0.0
-average reward score: 5.015625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (22.10%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-[2023-04-14 10:57:40,713] [INFO] [logging.py:96:log_dist] [Rank 0] step=3530, skipped=48, lr=[6.106069475380793e-06, 6.106069475380793e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:57:40,731] [INFO] [timer.py:199:stop] epoch=0/micro_step=3530/global_step=3530, RunningAvgSamplesPerSec=105.70634007207184, CurrSamplesPerSec=100.36808836011082, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:57:40,823] [INFO] [logging.py:96:log_dist] [Rank 0] step=3530, skipped=55, lr=[3.1702598186603152e-06, 3.1702598186603152e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3529|ppo_ep: 1|act_loss: 0.022003173828125|cri_loss: 0.011566162109375|unsuper_loss: 0.0
-average reward score: 6.10546875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.45%) |Training time=0.48s (22.08%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3530|ppo_ep: 1|act_loss: -0.027252197265625|cri_loss: -0.01322174072265625|unsuper_loss: 0.0
-average reward score: 5.08984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.25%) |Training time=0.48s (22.20%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3531|ppo_ep: 1|act_loss: -0.0112152099609375|cri_loss: -0.00482940673828125|unsuper_loss: 0.0
-average reward score: 5.36328125
--------------------------------------------------------------------------------------
-|E2E latency=2.54s |Gather latency=0.00s (0.00%) |Generate time=1.58s (62.12%) |Training time=0.49s (19.26%) |Others=0.47 (18.62%)|CurSamplesPerSec=12.59 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3532|ppo_ep: 1|act_loss: -0.002079010009765625|cri_loss: -0.0007619857788085938|unsuper_loss: 0.0
-average reward score: 5.5546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.80%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3533|ppo_ep: 1|act_loss: -0.0006546974182128906|cri_loss: -0.00023233890533447266|unsuper_loss: 0.0
-average reward score: 5.0
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.57%) |Training time=0.47s (21.87%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3534|ppo_ep: 1|act_loss: 0.02215576171875|cri_loss: 0.011871337890625|unsuper_loss: 0.0
-average reward score: 5.40625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.86%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3535|ppo_ep: 1|act_loss: -0.00395965576171875|cri_loss: -0.0013418197631835938|unsuper_loss: 0.0
-average reward score: 4.8125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.76%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3536|ppo_ep: 1|act_loss: -0.0135040283203125|cri_loss: -0.00617218017578125|unsuper_loss: 0.0
-average reward score: 5.51953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.73%) |Training time=0.47s (21.69%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3537|ppo_ep: 1|act_loss: 0.0075225830078125|cri_loss: 0.004032135009765625|unsuper_loss: 0.0
-average reward score: 6.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.81s |Gather latency=0.00s (0.00%) |Generate time=1.59s (56.84%) |Training time=0.47s (16.76%) |Others=0.74 (26.40%)|CurSamplesPerSec=11.41 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3538|ppo_ep: 1|act_loss: -0.02703857421875|cri_loss: -0.012420654296875|unsuper_loss: 0.0
-average reward score: 5.328125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.61%) |Training time=0.48s (21.90%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.46
-[2023-04-14 10:58:03,434] [INFO] [logging.py:96:log_dist] [Rank 0] step=3540, skipped=48, lr=[6.0881505367766705e-06, 6.0881505367766705e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:58:03,452] [INFO] [timer.py:199:stop] epoch=0/micro_step=3540/global_step=3540, RunningAvgSamplesPerSec=105.69823743299148, CurrSamplesPerSec=104.70218269755831, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:58:03,544] [INFO] [logging.py:96:log_dist] [Rank 0] step=3540, skipped=55, lr=[3.1609822671071404e-06, 3.1609822671071404e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3539|ppo_ep: 1|act_loss: -0.00218963623046875|cri_loss: -0.0005474090576171875|unsuper_loss: 0.0
-average reward score: 5.3125
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.91%) |Training time=0.47s (20.75%) |Others=0.10 (4.33%)|CurSamplesPerSec=14.19 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3540|ppo_ep: 1|act_loss: 0.0013484954833984375|cri_loss: 0.00098419189453125|unsuper_loss: 0.0
-average reward score: 5.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.53%) |Training time=0.49s (21.99%) |Others=0.14 (6.48%)|CurSamplesPerSec=14.44 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3541|ppo_ep: 1|act_loss: 0.004383087158203125|cri_loss: 0.00302886962890625|unsuper_loss: 0.0
-average reward score: 4.9453125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.92%) |Training time=0.46s (21.50%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3542|ppo_ep: 1|act_loss: 0.0372314453125|cri_loss: 0.0189666748046875|unsuper_loss: 0.0
-average reward score: 6.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.70%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3543|ppo_ep: 1|act_loss: -0.0019016265869140625|cri_loss: -0.0007863044738769531|unsuper_loss: 0.0
-average reward score: 4.609375
--------------------------------------------------------------------------------------
-|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.83%) |Training time=0.43s (20.51%) |Others=0.10 (4.65%)|CurSamplesPerSec=15.12 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3544|ppo_ep: 1|act_loss: 0.00246429443359375|cri_loss: 0.00146484375|unsuper_loss: 0.0
-average reward score: 5.890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.97%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3545|ppo_ep: 1|act_loss: 0.002471923828125|cri_loss: 0.0014495849609375|unsuper_loss: 0.0
-average reward score: 4.5234375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.22%) |Training time=0.48s (22.23%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3546|ppo_ep: 1|act_loss: -1.728534698486328e-05|cri_loss: 0.00010848045349121094|unsuper_loss: 0.0
-average reward score: 5.078125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.18%) |Training time=0.48s (22.27%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3547|ppo_ep: 1|act_loss: -0.004550933837890625|cri_loss: -0.002094268798828125|unsuper_loss: 0.0
-average reward score: 4.3203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.00%) |Training time=0.49s (22.46%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3548|ppo_ep: 1|act_loss: -0.0041656494140625|cri_loss: -0.001705169677734375|unsuper_loss: 0.0
-average reward score: 5.09765625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.18%) |Training time=0.48s (22.28%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
-[2023-04-14 10:58:25,043] [INFO] [logging.py:96:log_dist] [Rank 0] step=3550, skipped=48, lr=[6.070212875234407e-06, 6.070212875234407e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:58:25,628] [INFO] [timer.py:199:stop] epoch=0/micro_step=3550/global_step=3550, RunningAvgSamplesPerSec=105.63486237523144, CurrSamplesPerSec=36.294179826075954, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:58:25,721] [INFO] [logging.py:96:log_dist] [Rank 0] step=3550, skipped=55, lr=[3.151694918202235e-06, 3.151694918202235e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3549|ppo_ep: 1|act_loss: -0.04327392578125|cri_loss: -0.0212249755859375|unsuper_loss: 0.0
-average reward score: 5.03515625
--------------------------------------------------------------------------------------
-|E2E latency=2.72s |Gather latency=0.00s (0.00%) |Generate time=1.58s (58.02%) |Training time=1.04s (38.39%) |Others=0.10 (3.59%)|CurSamplesPerSec=11.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3550|ppo_ep: 1|act_loss: -0.011199951171875|cri_loss: -0.005176544189453125|unsuper_loss: 0.0
-average reward score: 4.8984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.21%) |Training time=0.48s (22.23%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3551|ppo_ep: 1|act_loss: -0.00017547607421875|cri_loss: 0.0007619857788085938|unsuper_loss: 0.0
-average reward score: 4.36328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.06%) |Training time=0.48s (22.37%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3552|ppo_ep: 1|act_loss: 0.0054168701171875|cri_loss: 0.00298309326171875|unsuper_loss: 0.0
-average reward score: 4.94140625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.11%) |Training time=0.46s (21.29%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3553|ppo_ep: 1|act_loss: -0.0256500244140625|cri_loss: -0.0125885009765625|unsuper_loss: 0.0
-average reward score: 4.7578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.74%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3554|ppo_ep: 1|act_loss: 0.0131378173828125|cri_loss: 0.007228851318359375|unsuper_loss: 0.0
-average reward score: 5.15625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.47s (21.68%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3555|ppo_ep: 1|act_loss: 0.0072784423828125|cri_loss: 0.0038852691650390625|unsuper_loss: 0.0
-average reward score: 4.77734375
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.74s (75.03%) |Training time=0.48s (20.73%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3556|ppo_ep: 1|act_loss: 0.004425048828125|cri_loss: 0.0025787353515625|unsuper_loss: 0.0
-average reward score: 5.47265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.67%) |Training time=0.47s (21.82%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3557|ppo_ep: 1|act_loss: 0.022064208984375|cri_loss: 0.0125885009765625|unsuper_loss: 0.0
-average reward score: 5.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.55%) |Training time=0.47s (21.90%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3558|ppo_ep: 1|act_loss: -0.018798828125|cri_loss: -0.0089874267578125|unsuper_loss: 0.0
-average reward score: 4.953125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.74%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-[2023-04-14 10:58:47,342] [INFO] [logging.py:96:log_dist] [Rank 0] step=3560, skipped=48, lr=[6.0522567566334204e-06, 6.0522567566334204e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:58:47,360] [INFO] [timer.py:199:stop] epoch=0/micro_step=3560/global_step=3560, RunningAvgSamplesPerSec=105.62931678444558, CurrSamplesPerSec=108.95321962132799, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:58:47,453] [INFO] [logging.py:96:log_dist] [Rank 0] step=3560, skipped=55, lr=[3.1423979096065134e-06, 3.1423979096065134e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3559|ppo_ep: 1|act_loss: -0.027008056640625|cri_loss: -0.0125732421875|unsuper_loss: 0.0
-average reward score: 4.98046875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.22%) |Training time=0.46s (21.23%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3560|ppo_ep: 1|act_loss: 0.0114288330078125|cri_loss: 0.006137847900390625|unsuper_loss: 0.0
-average reward score: 5.59765625
--------------------------------------------------------------------------------------
-|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.59s (67.66%) |Training time=0.46s (19.78%) |Others=0.30 (12.57%)|CurSamplesPerSec=13.61 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3561|ppo_ep: 1|act_loss: 0.0050811767578125|cri_loss: 0.002773284912109375|unsuper_loss: 0.0
-average reward score: 4.75
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.95%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3562|ppo_ep: 1|act_loss: -0.010101318359375|cri_loss: -0.00379180908203125|unsuper_loss: 0.0
-average reward score: 4.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.76%) |Training time=0.47s (21.66%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3563|ppo_ep: 1|act_loss: 0.033172607421875|cri_loss: 0.017578125|unsuper_loss: 0.0
-average reward score: 5.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.11%) |Training time=0.46s (21.32%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3564|ppo_ep: 1|act_loss: 0.005565643310546875|cri_loss: 0.003925323486328125|unsuper_loss: 0.0
-average reward score: 5.7421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.84%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3565|ppo_ep: 1|act_loss: 0.01776123046875|cri_loss: 0.0090789794921875|unsuper_loss: 0.0
-average reward score: 4.484375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.68%) |Training time=0.47s (21.76%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3566|ppo_ep: 1|act_loss: 0.0094451904296875|cri_loss: 0.00511932373046875|unsuper_loss: 0.0
-average reward score: 5.0
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.33%) |Training time=0.45s (21.06%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3567|ppo_ep: 1|act_loss: -0.00640869140625|cri_loss: -0.00267791748046875|unsuper_loss: 0.0
-average reward score: 4.79296875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.07%) |Training time=0.46s (21.07%) |Others=0.11 (4.85%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3568|ppo_ep: 1|act_loss: -0.02154541015625|cri_loss: -0.01007843017578125|unsuper_loss: 0.0
-average reward score: 4.8984375
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.62s (72.70%) |Training time=0.51s (22.87%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.40 |AvgSamplesPerSec=14.46
-[2023-04-14 10:59:09,144] [INFO] [logging.py:96:log_dist] [Rank 0] step=3570, skipped=48, lr=[6.0342824471267055e-06, 6.0342824471267055e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:59:09,162] [INFO] [timer.py:199:stop] epoch=0/micro_step=3570/global_step=3570, RunningAvgSamplesPerSec=105.63078928049823, CurrSamplesPerSec=105.9774002645137, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:59:09,255] [INFO] [logging.py:96:log_dist] [Rank 0] step=3570, skipped=55, lr=[3.1330913791240707e-06, 3.1330913791240707e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3569|ppo_ep: 1|act_loss: -0.0247802734375|cri_loss: -0.01129150390625|unsuper_loss: 0.0
-average reward score: 5.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.58%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3570|ppo_ep: 1|act_loss: -0.0223846435546875|cri_loss: -0.01056671142578125|unsuper_loss: 0.0
-average reward score: 5.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.66%) |Training time=0.46s (19.63%) |Others=0.13 (5.72%)|CurSamplesPerSec=13.60 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3571|ppo_ep: 1|act_loss: 0.016448974609375|cri_loss: 0.00888824462890625|unsuper_loss: 0.0
-average reward score: 4.62890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.52%) |Training time=0.47s (21.94%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3572|ppo_ep: 1|act_loss: -0.0006151199340820312|cri_loss: 0.00021076202392578125|unsuper_loss: 0.0
-average reward score: 5.0234375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.47s (21.78%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3573|ppo_ep: 1|act_loss: -0.006267547607421875|cri_loss: -0.0024242401123046875|unsuper_loss: 0.0
-average reward score: 5.28515625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.29%) |Training time=0.45s (21.14%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3574|ppo_ep: 1|act_loss: -0.027252197265625|cri_loss: -0.01296234130859375|unsuper_loss: 0.0
-average reward score: 5.60546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.82%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3575|ppo_ep: 1|act_loss: -0.00567626953125|cri_loss: -0.002521514892578125|unsuper_loss: 0.0
-average reward score: 5.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.68%) |Training time=0.47s (21.72%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3576|ppo_ep: 1|act_loss: -0.017608642578125|cri_loss: -0.0083770751953125|unsuper_loss: 0.0
-average reward score: 4.99609375
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.59s (68.85%) |Training time=0.46s (20.14%) |Others=0.25 (11.01%)|CurSamplesPerSec=13.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3577|ppo_ep: 1|act_loss: 0.00687408447265625|cri_loss: 0.0038509368896484375|unsuper_loss: 0.0
-average reward score: 4.546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.47s (21.59%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3578|ppo_ep: 1|act_loss: -0.0240020751953125|cri_loss: -0.01154327392578125|unsuper_loss: 0.0
-average reward score: 5.15234375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.70%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
-[2023-04-14 10:59:31,042] [INFO] [logging.py:96:log_dist] [Rank 0] step=3580, skipped=48, lr=[6.016290213136888e-06, 6.016290213136888e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:59:31,060] [INFO] [timer.py:199:stop] epoch=0/micro_step=3580/global_step=3580, RunningAvgSamplesPerSec=105.62996466221267, CurrSamplesPerSec=105.79012340813594, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:59:31,152] [INFO] [logging.py:96:log_dist] [Rank 0] step=3580, skipped=55, lr=[3.1237754647001396e-06, 3.1237754647001396e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3579|ppo_ep: 1|act_loss: 0.00429534912109375|cri_loss: 0.00231170654296875|unsuper_loss: 0.0
-average reward score: 5.15234375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.60%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3580|ppo_ep: 1|act_loss: 0.0301055908203125|cri_loss: 0.0154571533203125|unsuper_loss: 0.0
-average reward score: 4.7109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.65%) |Training time=0.47s (21.80%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3581|ppo_ep: 1|act_loss: -0.0161895751953125|cri_loss: -0.00774383544921875|unsuper_loss: 0.0
-average reward score: 5.6484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.51%) |Training time=0.45s (20.94%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3582|ppo_ep: 1|act_loss: -0.019378662109375|cri_loss: -0.0086212158203125|unsuper_loss: 0.0
-average reward score: 5.0703125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.01%) |Training time=0.44s (20.43%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3583|ppo_ep: 1|act_loss: 0.026580810546875|cri_loss: 0.0140228271484375|unsuper_loss: 0.0
-average reward score: 5.296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.47s (21.67%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3584|ppo_ep: 1|act_loss: 0.031585693359375|cri_loss: 0.01611328125|unsuper_loss: 0.0
-average reward score: 5.546875
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.62s (71.67%) |Training time=0.47s (20.92%) |Others=0.17 (7.41%)|CurSamplesPerSec=14.19 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3585|ppo_ep: 1|act_loss: 0.004833221435546875|cri_loss: 0.0026397705078125|unsuper_loss: 0.0
-average reward score: 5.5546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.97%) |Training time=0.46s (21.47%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3586|ppo_ep: 1|act_loss: 0.031219482421875|cri_loss: 0.016754150390625|unsuper_loss: 0.0
-average reward score: 5.2421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.47s (21.86%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3587|ppo_ep: 1|act_loss: 0.01354217529296875|cri_loss: 0.006961822509765625|unsuper_loss: 0.0
-average reward score: 5.35546875
--------------------------------------------------------------------------------------
-|E2E latency=2.44s |Gather latency=0.00s (0.00%) |Generate time=1.59s (65.22%) |Training time=0.47s (19.17%) |Others=0.38 (15.61%)|CurSamplesPerSec=13.13 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3588|ppo_ep: 1|act_loss: 0.003223419189453125|cri_loss: 0.002239227294921875|unsuper_loss: 0.0
-average reward score: 5.09765625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.67%) |Training time=0.47s (21.78%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-[2023-04-14 10:59:52,984] [INFO] [logging.py:96:log_dist] [Rank 0] step=3590, skipped=48, lr=[5.998280321352281e-06, 5.998280321352281e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 10:59:53,003] [INFO] [timer.py:199:stop] epoch=0/micro_step=3590/global_step=3590, RunningAvgSamplesPerSec=105.63170114714389, CurrSamplesPerSec=103.337966759135, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 10:59:53,095] [INFO] [logging.py:96:log_dist] [Rank 0] step=3590, skipped=55, lr=[3.1144503044190456e-06, 3.1144503044190456e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3589|ppo_ep: 1|act_loss: -0.026947021484375|cri_loss: -0.0128936767578125|unsuper_loss: 0.0
-average reward score: 5.046875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.91%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3590|ppo_ep: 1|act_loss: -0.0281524658203125|cri_loss: -0.0134124755859375|unsuper_loss: 0.0
-average reward score: 5.1953125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.95%) |Training time=0.46s (21.49%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3591|ppo_ep: 1|act_loss: -0.03668212890625|cri_loss: -0.017578125|unsuper_loss: 0.0
-average reward score: 5.39453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.88%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3592|ppo_ep: 1|act_loss: -0.001079559326171875|cri_loss: 0.00086212158203125|unsuper_loss: 0.0
-average reward score: 4.828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.66%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3593|ppo_ep: 1|act_loss: 0.0159759521484375|cri_loss: 0.00830078125|unsuper_loss: 0.0
-average reward score: 5.3828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.88%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3594|ppo_ep: 1|act_loss: 0.0513916015625|cri_loss: 0.027069091796875|unsuper_loss: 0.0
-average reward score: 4.35546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.81%) |Training time=0.47s (21.62%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3595|ppo_ep: 1|act_loss: 0.01678466796875|cri_loss: 0.00872039794921875|unsuper_loss: 0.0
-average reward score: 5.67578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.47s (21.86%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3596|ppo_ep: 1|act_loss: 0.02386474609375|cri_loss: 0.012664794921875|unsuper_loss: 0.0
-average reward score: 3.826171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.23%) |Training time=0.46s (21.20%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3597|ppo_ep: 1|act_loss: 0.0190582275390625|cri_loss: 0.00998687744140625|unsuper_loss: 0.0
-average reward score: 4.86328125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.16%) |Training time=0.45s (20.35%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3598|ppo_ep: 1|act_loss: -0.018890380859375|cri_loss: -0.0084686279296875|unsuper_loss: 0.0
-average reward score: 5.3125
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.69s (75.01%) |Training time=0.46s (20.62%) |Others=0.10 (4.36%)|CurSamplesPerSec=14.22 |AvgSamplesPerSec=14.46
-[2023-04-14 11:00:14,694] [INFO] [logging.py:96:log_dist] [Rank 0] step=3600, skipped=48, lr=[5.980253038722927e-06, 5.980253038722927e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:00:15,110] [INFO] [timer.py:199:stop] epoch=0/micro_step=3600/global_step=3600, RunningAvgSamplesPerSec=105.5920159904189, CurrSamplesPerSec=44.41792340872793, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:00:15,203] [INFO] [logging.py:96:log_dist] [Rank 0] step=3600, skipped=55, lr=[3.1051160365021587e-06, 3.1051160365021587e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3599|ppo_ep: 1|act_loss: -0.0035114288330078125|cri_loss: -0.00138092041015625|unsuper_loss: 0.0
-average reward score: 4.9375
--------------------------------------------------------------------------------------
-|E2E latency=2.56s |Gather latency=0.00s (0.00%) |Generate time=1.58s (61.70%) |Training time=0.88s (34.49%) |Others=0.10 (3.81%)|CurSamplesPerSec=12.49 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3600|ppo_ep: 1|act_loss: 0.02923583984375|cri_loss: 0.015167236328125|unsuper_loss: 0.0
-average reward score: 5.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.54%) |Training time=0.47s (20.24%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3601|ppo_ep: 1|act_loss: -0.040283203125|cri_loss: -0.0187835693359375|unsuper_loss: 0.0
-average reward score: 4.88671875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.02%) |Training time=0.49s (22.49%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3602|ppo_ep: 1|act_loss: -0.0059967041015625|cri_loss: -0.0027408599853515625|unsuper_loss: 0.0
-average reward score: 5.328125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.73%) |Training time=0.47s (21.72%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3603|ppo_ep: 1|act_loss: -0.01409149169921875|cri_loss: -0.00624847412109375|unsuper_loss: 0.0
-average reward score: 4.75390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.84%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3604|ppo_ep: 1|act_loss: -0.012481689453125|cri_loss: -0.00504302978515625|unsuper_loss: 0.0
-average reward score: 5.59375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.82%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3605|ppo_ep: 1|act_loss: 0.0126190185546875|cri_loss: 0.006801605224609375|unsuper_loss: 0.0
-average reward score: 4.953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.74%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3606|ppo_ep: 1|act_loss: -0.0205230712890625|cri_loss: -0.00936126708984375|unsuper_loss: 0.0
-average reward score: 5.51171875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.81%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3607|ppo_ep: 1|act_loss: 0.03607177734375|cri_loss: 0.0196533203125|unsuper_loss: 0.0
-average reward score: 4.8828125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.06%) |Training time=0.46s (21.39%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3608|ppo_ep: 1|act_loss: 0.00792694091796875|cri_loss: 0.004543304443359375|unsuper_loss: 0.0
-average reward score: 5.38671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.70%) |Training time=0.47s (21.75%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-[2023-04-14 11:00:36,841] [INFO] [logging.py:96:log_dist] [Rank 0] step=3610, skipped=48, lr=[5.962208632456643e-06, 5.962208632456643e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:00:37,031] [INFO] [timer.py:199:stop] epoch=0/micro_step=3610/global_step=3610, RunningAvgSamplesPerSec=105.57008798024897, CurrSamplesPerSec=66.34913324972403, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:00:37,124] [INFO] [logging.py:96:log_dist] [Rank 0] step=3610, skipped=55, lr=[3.095772799305849e-06, 3.095772799305849e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3609|ppo_ep: 1|act_loss: 0.00445556640625|cri_loss: 0.00432586669921875|unsuper_loss: 0.0
-average reward score: 4.5390625
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.59s (68.12%) |Training time=0.64s (27.68%) |Others=0.10 (4.20%)|CurSamplesPerSec=13.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3610|ppo_ep: 1|act_loss: 0.0025844573974609375|cri_loss: 0.0023670196533203125|unsuper_loss: 0.0
-average reward score: 5.9453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.84%) |Training time=0.47s (21.60%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3611|ppo_ep: 1|act_loss: 0.001483917236328125|cri_loss: 0.0009298324584960938|unsuper_loss: 0.0
-average reward score: 4.76953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.36%) |Training time=0.46s (21.10%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3612|ppo_ep: 1|act_loss: -0.013458251953125|cri_loss: -0.00569915771484375|unsuper_loss: 0.0
-average reward score: 4.8828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.22%) |Training time=0.46s (21.23%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3613|ppo_ep: 1|act_loss: 0.023895263671875|cri_loss: 0.0121612548828125|unsuper_loss: 0.0
-average reward score: 5.64453125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.02%) |Training time=0.49s (22.46%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3614|ppo_ep: 1|act_loss: -0.01114654541015625|cri_loss: -0.00380706787109375|unsuper_loss: 0.0
-average reward score: 5.38671875
--------------------------------------------------------------------------------------
-|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.81s (76.90%) |Training time=0.45s (18.92%) |Others=0.10 (4.18%)|CurSamplesPerSec=13.58 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3615|ppo_ep: 1|act_loss: -0.00399017333984375|cri_loss: -0.0018091201782226562|unsuper_loss: 0.0
-average reward score: 4.8828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.54%) |Training time=0.41s (18.88%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-[2023-04-14 11:00:52,492] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 3616|ppo_ep: 1|act_loss: 0.008026123046875|cri_loss: 0.00434112548828125|unsuper_loss: 0.0
-average reward score: 4.6796875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.71%) |Training time=0.46s (21.22%) |Others=0.09 (4.07%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46
-[2023-04-14 11:00:54,667] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 3617|ppo_ep: 1|act_loss: 0.002307891845703125|cri_loss: 0.00164794921875|unsuper_loss: 0.0
-average reward score: 5.015625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.91%) |Training time=0.46s (21.00%) |Others=0.09 (4.09%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3618|ppo_ep: 1|act_loss: 0.0006394386291503906|cri_loss: 0.0004863739013671875|unsuper_loss: 0.0
-average reward score: 4.8125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.28%) |Training time=0.46s (21.21%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.46
-[2023-04-14 11:00:58,913] [INFO] [logging.py:96:log_dist] [Rank 0] step=3620, skipped=48, lr=[5.944147370015059e-06, 5.944147370015059e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:00:58,931] [INFO] [timer.py:199:stop] epoch=0/micro_step=3620/global_step=3620, RunningAvgSamplesPerSec=105.58135629427218, CurrSamplesPerSec=118.61425940393161, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:00:59,024] [INFO] [logging.py:96:log_dist] [Rank 0] step=3620, skipped=57, lr=[3.0882918447280204e-06, 3.0882918447280204e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3619|ppo_ep: 1|act_loss: -0.03326416015625|cri_loss: -0.01611328125|unsuper_loss: 0.0
-average reward score: 5.1484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.46%) |Training time=0.43s (19.96%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3620|ppo_ep: 1|act_loss: -0.02386474609375|cri_loss: -0.01171112060546875|unsuper_loss: 0.0
-average reward score: 5.25
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.70%) |Training time=0.43s (19.74%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3621|ppo_ep: 1|act_loss: 0.01934814453125|cri_loss: 0.01015472412109375|unsuper_loss: 0.0
-average reward score: 5.0
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.21%) |Training time=0.46s (21.23%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3622|ppo_ep: 1|act_loss: -0.01226806640625|cri_loss: -0.00591278076171875|unsuper_loss: 0.0
-average reward score: 4.8046875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.31%) |Training time=0.46s (21.14%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3623|ppo_ep: 1|act_loss: -0.01050567626953125|cri_loss: -0.0050811767578125|unsuper_loss: 0.0
-average reward score: 5.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.17%) |Training time=0.46s (21.29%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3624|ppo_ep: 1|act_loss: -0.0005555152893066406|cri_loss: 0.0001010894775390625|unsuper_loss: 0.0
-average reward score: 5.59375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.24%) |Training time=0.46s (21.21%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3625|ppo_ep: 1|act_loss: 0.0088043212890625|cri_loss: 0.0050048828125|unsuper_loss: 0.0
-average reward score: 4.6640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.30%) |Training time=0.46s (21.14%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3626|ppo_ep: 1|act_loss: -0.041839599609375|cri_loss: -0.020050048828125|unsuper_loss: 0.0
-average reward score: 5.16015625
--------------------------------------------------------------------------------------
-|E2E latency=3.37s |Gather latency=0.00s (0.00%) |Generate time=1.62s (48.24%) |Training time=0.47s (13.97%) |Others=1.27 (37.79%)|CurSamplesPerSec=9.50 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3627|ppo_ep: 1|act_loss: -0.0139312744140625|cri_loss: -0.0064697265625|unsuper_loss: 0.0
-average reward score: 4.82421875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.17%) |Training time=0.46s (21.16%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3628|ppo_ep: 1|act_loss: -0.03765869140625|cri_loss: -0.0177001953125|unsuper_loss: 0.0
-average reward score: 5.5546875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.62%) |Training time=0.44s (20.77%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.46
-[2023-04-14 11:01:21,955] [INFO] [logging.py:96:log_dist] [Rank 0] step=3630, skipped=48, lr=[5.926069519109657e-06, 5.926069519109657e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:01:21,973] [INFO] [timer.py:199:stop] epoch=0/micro_step=3630/global_step=3630, RunningAvgSamplesPerSec=105.5895945710597, CurrSamplesPerSec=105.85695582357191, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:01:22,071] [INFO] [logging.py:96:log_dist] [Rank 0] step=3630, skipped=57, lr=[3.078932811909849e-06, 3.078932811909849e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3629|ppo_ep: 1|act_loss: -0.034912109375|cri_loss: -0.0164337158203125|unsuper_loss: 0.0
-average reward score: 5.29296875
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.60%) |Training time=0.47s (19.87%) |Others=0.11 (4.53%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3630|ppo_ep: 1|act_loss: -0.0015287399291992188|cri_loss: 3.528594970703125e-05|unsuper_loss: 0.0
-average reward score: 4.5
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.70%) |Training time=0.44s (20.62%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3631|ppo_ep: 1|act_loss: 0.0217132568359375|cri_loss: 0.01214599609375|unsuper_loss: 0.0
-average reward score: 4.7421875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.87%) |Training time=0.44s (20.54%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3632|ppo_ep: 1|act_loss: -0.015869140625|cri_loss: -0.00717926025390625|unsuper_loss: 0.0
-average reward score: 4.9453125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.23%) |Training time=0.45s (21.11%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3633|ppo_ep: 1|act_loss: 0.01430511474609375|cri_loss: 0.0074462890625|unsuper_loss: 0.0
-average reward score: 5.79296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.95%) |Training time=0.46s (21.47%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3634|ppo_ep: 1|act_loss: -0.0142669677734375|cri_loss: -0.006793975830078125|unsuper_loss: 0.0
-average reward score: 3.82421875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.95%) |Training time=0.46s (21.47%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3635|ppo_ep: 1|act_loss: 0.01023101806640625|cri_loss: 0.00545501708984375|unsuper_loss: 0.0
-average reward score: 5.4296875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.44%) |Training time=0.45s (20.97%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3636|ppo_ep: 1|act_loss: 0.01654052734375|cri_loss: 0.0084381103515625|unsuper_loss: 0.0
-average reward score: 4.94921875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.12%) |Training time=0.46s (21.31%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3637|ppo_ep: 1|act_loss: -0.031585693359375|cri_loss: -0.01507568359375|unsuper_loss: 0.0
-average reward score: 6.13671875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.91%) |Training time=0.46s (21.51%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3638|ppo_ep: 1|act_loss: 0.0236053466796875|cri_loss: 0.01245880126953125|unsuper_loss: 0.0
-average reward score: 5.23828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.46%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-[2023-04-14 11:01:43,424] [INFO] [logging.py:96:log_dist] [Rank 0] step=3640, skipped=48, lr=[5.907975347697794e-06, 5.907975347697794e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:01:43,443] [INFO] [timer.py:199:stop] epoch=0/micro_step=3640/global_step=3640, RunningAvgSamplesPerSec=105.59923223574494, CurrSamplesPerSec=105.7564471787954, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:01:43,535] [INFO] [logging.py:96:log_dist] [Rank 0] step=3640, skipped=57, lr=[3.069565197910806e-06, 3.069565197910806e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3639|ppo_ep: 1|act_loss: 0.0116119384765625|cri_loss: 0.00627899169921875|unsuper_loss: 0.0
-average reward score: 5.40625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.77%) |Training time=0.47s (21.68%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3640|ppo_ep: 1|act_loss: -0.004119873046875|cri_loss: -0.0014057159423828125|unsuper_loss: 0.0
-average reward score: 4.40625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.88%) |Training time=0.46s (21.53%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3641|ppo_ep: 1|act_loss: 0.00606536865234375|cri_loss: 0.004093170166015625|unsuper_loss: 0.0
-average reward score: 4.7890625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.46s (21.60%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3642|ppo_ep: 1|act_loss: 0.00603485107421875|cri_loss: 0.00371551513671875|unsuper_loss: 0.0
-average reward score: 4.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.57%) |Training time=0.45s (20.88%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3643|ppo_ep: 1|act_loss: 0.00029468536376953125|cri_loss: 0.0005202293395996094|unsuper_loss: 0.0
-average reward score: 4.921875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.92%) |Training time=0.46s (21.50%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3644|ppo_ep: 1|act_loss: -0.00589752197265625|cri_loss: -0.00200653076171875|unsuper_loss: 0.0
-average reward score: 4.6640625
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.89%) |Training time=0.46s (19.85%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3645|ppo_ep: 1|act_loss: 0.0184326171875|cri_loss: 0.009765625|unsuper_loss: 0.0
-average reward score: 5.7265625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.72%) |Training time=0.44s (20.70%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3646|ppo_ep: 1|act_loss: 0.01568603515625|cri_loss: 0.00806427001953125|unsuper_loss: 0.0
-average reward score: 4.5234375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.30%) |Training time=0.45s (21.13%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3647|ppo_ep: 1|act_loss: -0.00588226318359375|cri_loss: -0.0027637481689453125|unsuper_loss: 0.0
-average reward score: 5.4609375
--------------------------------------------------------------------------------------
-|E2E latency=2.49s |Gather latency=0.00s (0.00%) |Generate time=1.59s (63.95%) |Training time=0.46s (18.28%) |Others=0.44 (17.77%)|CurSamplesPerSec=12.85 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3648|ppo_ep: 1|act_loss: -0.0154571533203125|cri_loss: -0.00659942626953125|unsuper_loss: 0.0
-average reward score: 5.08203125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.12%) |Training time=0.46s (21.30%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
-[2023-04-14 11:02:05,410] [INFO] [logging.py:96:log_dist] [Rank 0] step=3650, skipped=48, lr=[5.889865123978745e-06, 5.889865123978745e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:02:05,429] [INFO] [timer.py:199:stop] epoch=0/micro_step=3650/global_step=3650, RunningAvgSamplesPerSec=105.60741290301202, CurrSamplesPerSec=106.42917645640826, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:02:05,521] [INFO] [logging.py:96:log_dist] [Rank 0] step=3650, skipped=57, lr=[3.0601891415815286e-06, 3.0601891415815286e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3649|ppo_ep: 1|act_loss: -0.001522064208984375|cri_loss: -0.0004944801330566406|unsuper_loss: 0.0
-average reward score: 5.62890625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.90%) |Training time=0.46s (21.54%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
-[2023-04-14 11:02:07,543] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 3650|ppo_ep: 1|act_loss: -0.002902984619140625|cri_loss: -0.0012159347534179688|unsuper_loss: 0.0
-average reward score: 5.1328125
--------------------------------------------------------------------------------------
-|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.59s (75.42%) |Training time=0.42s (19.95%) |Others=0.10 (4.63%)|CurSamplesPerSec=15.14 |AvgSamplesPerSec=14.46
-[2023-04-14 11:02:09,663] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 3651|ppo_ep: 1|act_loss: -0.030426025390625|cri_loss: -0.01433563232421875|unsuper_loss: 0.0
-average reward score: 4.93359375
--------------------------------------------------------------------------------------
-|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.59s (75.21%) |Training time=0.43s (20.16%) |Others=0.10 (4.62%)|CurSamplesPerSec=15.10 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3652|ppo_ep: 1|act_loss: -0.01470947265625|cri_loss: -0.00661468505859375|unsuper_loss: 0.0
-average reward score: 4.55859375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.48%) |Training time=0.45s (20.93%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3653|ppo_ep: 1|act_loss: 0.00142669677734375|cri_loss: 0.0014524459838867188|unsuper_loss: 0.0
-average reward score: 5.515625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.37%) |Training time=0.45s (21.03%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3654|ppo_ep: 1|act_loss: 0.007843017578125|cri_loss: 0.0045013427734375|unsuper_loss: 0.0
-average reward score: 5.23828125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.11%) |Training time=0.46s (21.30%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3655|ppo_ep: 1|act_loss: 0.038421630859375|cri_loss: 0.020263671875|unsuper_loss: 0.0
-average reward score: 5.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.45s (20.67%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3656|ppo_ep: 1|act_loss: 0.031494140625|cri_loss: 0.016754150390625|unsuper_loss: 0.0
-average reward score: 4.28125
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.32%) |Training time=0.46s (20.29%) |Others=0.10 (4.39%)|CurSamplesPerSec=14.19 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3657|ppo_ep: 1|act_loss: 0.0087890625|cri_loss: 0.004703521728515625|unsuper_loss: 0.0
-average reward score: 4.9453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.46s (21.46%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3658|ppo_ep: 1|act_loss: 0.00679779052734375|cri_loss: 0.003719329833984375|unsuper_loss: 0.0
-average reward score: 4.96484375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.57%) |Training time=0.40s (18.52%) |Others=0.17 (7.92%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-[2023-04-14 11:02:27,160] [INFO] [logging.py:96:log_dist] [Rank 0] step=3660, skipped=50, lr=[5.875365567724234e-06, 5.875365567724234e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:02:27,178] [INFO] [timer.py:199:stop] epoch=0/micro_step=3660/global_step=3660, RunningAvgSamplesPerSec=105.62991461506235, CurrSamplesPerSec=107.63474458970275, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:02:27,271] [INFO] [logging.py:96:log_dist] [Rank 0] step=3660, skipped=57, lr=[3.050804781897791e-06, 3.050804781897791e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3659|ppo_ep: 1|act_loss: -0.00601959228515625|cri_loss: -0.002696990966796875|unsuper_loss: 0.0
-average reward score: 5.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.01%) |Training time=0.46s (21.42%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3660|ppo_ep: 1|act_loss: -0.065185546875|cri_loss: -0.03179931640625|unsuper_loss: 0.0
-average reward score: 5.4609375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.09%) |Training time=0.46s (21.33%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3661|ppo_ep: 1|act_loss: -0.0310211181640625|cri_loss: -0.013946533203125|unsuper_loss: 0.0
-average reward score: 5.1953125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.61%) |Training time=0.47s (21.80%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3662|ppo_ep: 1|act_loss: -0.03411865234375|cri_loss: -0.01593017578125|unsuper_loss: 0.0
-average reward score: 5.3671875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.65%) |Training time=0.47s (21.76%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3663|ppo_ep: 1|act_loss: 0.043853759765625|cri_loss: 0.0233917236328125|unsuper_loss: 0.0
-average reward score: 5.7109375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.73%) |Training time=0.47s (21.70%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3664|ppo_ep: 1|act_loss: 0.02154541015625|cri_loss: 0.0111846923828125|unsuper_loss: 0.0
-average reward score: 4.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.57%) |Training time=0.47s (21.85%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3665|ppo_ep: 1|act_loss: 0.023284912109375|cri_loss: 0.0124053955078125|unsuper_loss: 0.0
-average reward score: 5.203125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.56%) |Training time=0.47s (21.87%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3666|ppo_ep: 1|act_loss: -0.0031890869140625|cri_loss: -0.0012874603271484375|unsuper_loss: 0.0
-average reward score: 4.8203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.99%) |Training time=0.46s (21.46%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3667|ppo_ep: 1|act_loss: 0.03533935546875|cri_loss: 0.0186767578125|unsuper_loss: 0.0
-average reward score: 4.8671875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.62%) |Training time=0.47s (21.77%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3668|ppo_ep: 1|act_loss: -0.030364990234375|cri_loss: -0.01441192626953125|unsuper_loss: 0.0
-average reward score: 5.51953125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.64%) |Training time=0.47s (21.79%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
-[2023-04-14 11:02:48,613] [INFO] [logging.py:96:log_dist] [Rank 0] step=3670, skipped=50, lr=[5.85722712647135e-06, 5.85722712647135e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:02:48,631] [INFO] [timer.py:199:stop] epoch=0/micro_step=3670/global_step=3670, RunningAvgSamplesPerSec=105.6294239805947, CurrSamplesPerSec=105.60508599933907, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:02:48,724] [INFO] [logging.py:96:log_dist] [Rank 0] step=3670, skipped=57, lr=[3.0414122579584403e-06, 3.0414122579584403e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3669|ppo_ep: 1|act_loss: -0.0185546875|cri_loss: -0.0088653564453125|unsuper_loss: 0.0
-average reward score: 5.953125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.69%) |Training time=0.47s (21.72%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3670|ppo_ep: 1|act_loss: -0.027069091796875|cri_loss: -0.01312255859375|unsuper_loss: 0.0
-average reward score: 5.16015625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.44%) |Training time=0.47s (21.98%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3671|ppo_ep: 1|act_loss: -0.01959228515625|cri_loss: -0.00933074951171875|unsuper_loss: 0.0
-average reward score: 5.1796875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.79%) |Training time=0.46s (21.45%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3672|ppo_ep: 1|act_loss: -0.012786865234375|cri_loss: -0.005970001220703125|unsuper_loss: 0.0
-average reward score: 4.66015625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.10%) |Training time=0.49s (22.29%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3673|ppo_ep: 1|act_loss: 0.0139007568359375|cri_loss: 0.00730133056640625|unsuper_loss: 0.0
-average reward score: 5.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.53s |Gather latency=0.00s (0.00%) |Generate time=1.59s (62.96%) |Training time=0.48s (19.10%) |Others=0.45 (17.94%)|CurSamplesPerSec=12.66 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3674|ppo_ep: 1|act_loss: 0.0052490234375|cri_loss: 0.0029621124267578125|unsuper_loss: 0.0
-average reward score: 5.703125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.48s (22.29%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3675|ppo_ep: 1|act_loss: 0.02960205078125|cri_loss: 0.01507568359375|unsuper_loss: 0.0
-average reward score: 4.94140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.28%) |Training time=0.48s (22.13%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3676|ppo_ep: 1|act_loss: 0.0081329345703125|cri_loss: 0.00453948974609375|unsuper_loss: 0.0
-average reward score: 5.44921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.90%) |Training time=0.49s (22.50%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3677|ppo_ep: 1|act_loss: 0.030242919921875|cri_loss: 0.01534271240234375|unsuper_loss: 0.0
-average reward score: 4.87890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.10%) |Training time=0.48s (22.34%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3678|ppo_ep: 1|act_loss: 0.009796142578125|cri_loss: 0.005096435546875|unsuper_loss: 0.0
-average reward score: 4.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.48s (22.02%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-[2023-04-14 11:03:10,632] [INFO] [logging.py:96:log_dist] [Rank 0] step=3680, skipped=50, lr=[5.8390733851223396e-06, 5.8390733851223396e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:03:10,651] [INFO] [timer.py:199:stop] epoch=0/micro_step=3680/global_step=3680, RunningAvgSamplesPerSec=105.61558640234166, CurrSamplesPerSec=100.31370274593043, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:03:10,744] [INFO] [logging.py:96:log_dist] [Rank 0] step=3680, skipped=57, lr=[3.0320117089833414e-06, 3.0320117089833414e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3679|ppo_ep: 1|act_loss: -0.0201873779296875|cri_loss: -0.0089111328125|unsuper_loss: 0.0
-average reward score: 5.4609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.23%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3680|ppo_ep: 1|act_loss: -0.060821533203125|cri_loss: -0.028778076171875|unsuper_loss: 0.0
-average reward score: 5.39453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.02%) |Training time=0.49s (22.39%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3681|ppo_ep: 1|act_loss: 0.003955841064453125|cri_loss: 0.004291534423828125|unsuper_loss: 0.0
-average reward score: 4.5
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.61%) |Training time=0.47s (21.84%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3682|ppo_ep: 1|act_loss: -0.01025390625|cri_loss: -0.0044708251953125|unsuper_loss: 0.0
-average reward score: 4.9609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.97%) |Training time=0.49s (22.48%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3683|ppo_ep: 1|act_loss: -0.026702880859375|cri_loss: -0.0126190185546875|unsuper_loss: 0.0
-average reward score: 5.83984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.48s (21.94%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3684|ppo_ep: 1|act_loss: -0.0031147003173828125|cri_loss: -0.0009250640869140625|unsuper_loss: 0.0
-average reward score: 5.26953125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.30%) |Training time=0.48s (22.19%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3685|ppo_ep: 1|act_loss: 0.017364501953125|cri_loss: 0.00927734375|unsuper_loss: 0.0
-average reward score: 5.3671875
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.26%) |Training time=0.49s (22.07%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3686|ppo_ep: 1|act_loss: 0.017181396484375|cri_loss: 0.0089111328125|unsuper_loss: 0.0
-average reward score: 5.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.67s (74.08%) |Training time=0.49s (21.48%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.16 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3687|ppo_ep: 1|act_loss: 0.025054931640625|cri_loss: 0.01331329345703125|unsuper_loss: 0.0
-average reward score: 5.5
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.61%) |Training time=0.47s (21.82%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3688|ppo_ep: 1|act_loss: 0.032684326171875|cri_loss: 0.0173187255859375|unsuper_loss: 0.0
-average reward score: 5.28515625
--------------------------------------------------------------------------------------
-|E2E latency=3.45s |Gather latency=0.00s (0.00%) |Generate time=1.59s (45.98%) |Training time=0.49s (14.10%) |Others=1.38 (39.92%)|CurSamplesPerSec=9.28 |AvgSamplesPerSec=14.46
-[2023-04-14 11:03:33,753] [INFO] [logging.py:96:log_dist] [Rank 0] step=3690, skipped=50, lr=[5.820904612759442e-06, 5.820904612759442e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:03:33,772] [INFO] [timer.py:199:stop] epoch=0/micro_step=3690/global_step=3690, RunningAvgSamplesPerSec=105.59878176187217, CurrSamplesPerSec=96.78853632804528, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:03:33,865] [INFO] [logging.py:96:log_dist] [Rank 0] step=3690, skipped=57, lr=[3.022603274311307e-06, 3.022603274311307e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3689|ppo_ep: 1|act_loss: -0.01277923583984375|cri_loss: -0.0057220458984375|unsuper_loss: 0.0
-average reward score: 5.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.77%) |Training time=0.49s (22.68%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3690|ppo_ep: 1|act_loss: 0.0052642822265625|cri_loss: 0.00312042236328125|unsuper_loss: 0.0
-average reward score: 4.484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.29%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3691|ppo_ep: 1|act_loss: -0.028533935546875|cri_loss: -0.01375579833984375|unsuper_loss: 0.0
-average reward score: 5.3125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.87%) |Training time=0.49s (22.59%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3692|ppo_ep: 1|act_loss: -0.03851318359375|cri_loss: -0.0184783935546875|unsuper_loss: 0.0
-average reward score: 5.12109375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.64%) |Training time=0.49s (22.79%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3693|ppo_ep: 1|act_loss: 0.05462646484375|cri_loss: 0.031951904296875|unsuper_loss: 0.0
-average reward score: 5.15625
--------------------------------------------------------------------------------------
-|E2E latency=3.03s |Gather latency=0.00s (0.00%) |Generate time=1.59s (52.57%) |Training time=0.49s (16.28%) |Others=0.94 (31.15%)|CurSamplesPerSec=10.55 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3694|ppo_ep: 1|act_loss: -0.03289794921875|cri_loss: -0.015838623046875|unsuper_loss: 0.0
-average reward score: 4.7890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.56%) |Training time=0.50s (22.90%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3695|ppo_ep: 1|act_loss: 0.0087890625|cri_loss: 0.004550933837890625|unsuper_loss: 0.0
-average reward score: 5.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.63%) |Training time=0.49s (22.80%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-[2023-04-14 11:03:49,795] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
-epoch: 0|step: 3696|ppo_ep: 1|act_loss: -0.009063720703125|cri_loss: -0.0033855438232421875|unsuper_loss: 0.0
-average reward score: 4.74609375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.64%) |Training time=0.47s (21.74%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
-[2023-04-14 11:03:51,942] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192
-epoch: 0|step: 3697|ppo_ep: 1|act_loss: -0.06842041015625|cri_loss: -0.03167724609375|unsuper_loss: 0.0
-average reward score: 5.03125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.41%) |Training time=0.47s (21.99%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3698|ppo_ep: 1|act_loss: 0.05010986328125|cri_loss: 0.0263214111328125|unsuper_loss: 0.0
-average reward score: 5.234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.61%) |Training time=0.50s (22.86%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-[2023-04-14 11:03:56,267] [INFO] [logging.py:96:log_dist] [Rank 0] step=3700, skipped=52, lr=[5.806358953504726e-06, 5.806358953504726e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:03:56,286] [INFO] [timer.py:199:stop] epoch=0/micro_step=3700/global_step=3700, RunningAvgSamplesPerSec=105.5771961459714, CurrSamplesPerSec=97.05807384695485, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:03:56,380] [INFO] [logging.py:96:log_dist] [Rank 0] step=3700, skipped=57, lr=[3.013187093398035e-06, 3.013187093398035e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3699|ppo_ep: 1|act_loss: -0.003143310546875|cri_loss: -0.001316070556640625|unsuper_loss: 0.0
-average reward score: 4.859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.75%) |Training time=0.49s (22.67%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3700|ppo_ep: 1|act_loss: -0.018280029296875|cri_loss: -0.008575439453125|unsuper_loss: 0.0
-average reward score: 4.87890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.48s (22.36%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3701|ppo_ep: 1|act_loss: -0.018524169921875|cri_loss: -0.00849151611328125|unsuper_loss: 0.0
-average reward score: 4.8125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.24%) |Training time=0.48s (22.16%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3702|ppo_ep: 1|act_loss: 0.01904296875|cri_loss: 0.00995635986328125|unsuper_loss: 0.0
-average reward score: 5.17578125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.31%) |Training time=0.48s (22.14%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3703|ppo_ep: 1|act_loss: 0.00037097930908203125|cri_loss: 0.0003833770751953125|unsuper_loss: 0.0
-average reward score: 5.90234375
--------------------------------------------------------------------------------------
-|E2E latency=3.27s |Gather latency=0.00s (0.00%) |Generate time=1.75s (53.58%) |Training time=0.49s (14.82%) |Others=1.03 (31.60%)|CurSamplesPerSec=9.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3704|ppo_ep: 1|act_loss: -0.04229736328125|cri_loss: -0.01947021484375|unsuper_loss: 0.0
-average reward score: 5.7265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.03%) |Training time=0.49s (22.37%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3705|ppo_ep: 1|act_loss: -0.007030487060546875|cri_loss: -0.0033054351806640625|unsuper_loss: 0.0
-average reward score: 4.59375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.01%) |Training time=0.49s (22.40%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3706|ppo_ep: 1|act_loss: -0.02105712890625|cri_loss: -0.0096282958984375|unsuper_loss: 0.0
-average reward score: 5.09375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.17%) |Training time=0.48s (22.25%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3707|ppo_ep: 1|act_loss: -0.01012420654296875|cri_loss: -0.00421905517578125|unsuper_loss: 0.0
-average reward score: 5.02734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.91%) |Training time=0.49s (22.56%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3708|ppo_ep: 1|act_loss: 0.0220947265625|cri_loss: 0.01151275634765625|unsuper_loss: 0.0
-average reward score: 5.20703125
--------------------------------------------------------------------------------------
-|E2E latency=2.70s |Gather latency=0.00s (0.00%) |Generate time=1.59s (58.93%) |Training time=0.48s (17.65%) |Others=0.63 (23.42%)|CurSamplesPerSec=11.85 |AvgSamplesPerSec=14.46
-[2023-04-14 11:04:19,624] [INFO] [logging.py:96:log_dist] [Rank 0] step=3710, skipped=52, lr=[5.788163804112695e-06, 5.788163804112695e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:04:19,642] [INFO] [timer.py:199:stop] epoch=0/micro_step=3710/global_step=3710, RunningAvgSamplesPerSec=105.55912844115356, CurrSamplesPerSec=96.80682664850997, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:04:19,736] [INFO] [logging.py:96:log_dist] [Rank 0] step=3710, skipped=57, lr=[3.0037633058140433e-06, 3.0037633058140433e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3709|ppo_ep: 1|act_loss: 0.0076446533203125|cri_loss: 0.00484466552734375|unsuper_loss: 0.0
-average reward score: 5.703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.63%) |Training time=0.49s (22.78%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3710|ppo_ep: 1|act_loss: 0.03350830078125|cri_loss: 0.0171356201171875|unsuper_loss: 0.0
-average reward score: 4.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.71%) |Training time=0.49s (22.71%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3711|ppo_ep: 1|act_loss: -0.0281982421875|cri_loss: -0.01369476318359375|unsuper_loss: 0.0
-average reward score: 5.3359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.98%) |Training time=0.49s (22.45%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3712|ppo_ep: 1|act_loss: 0.04644775390625|cri_loss: 0.02423095703125|unsuper_loss: 0.0
-average reward score: 5.6015625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.90%) |Training time=0.49s (22.59%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3713|ppo_ep: 1|act_loss: 0.0093536376953125|cri_loss: 0.0048980712890625|unsuper_loss: 0.0
-average reward score: 4.2109375
--------------------------------------------------------------------------------------
-|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.61s (72.46%) |Training time=0.51s (22.91%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.38 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3714|ppo_ep: 1|act_loss: -0.018890380859375|cri_loss: -0.0089263916015625|unsuper_loss: 0.0
-average reward score: 5.390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.02%) |Training time=0.49s (22.41%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3715|ppo_ep: 1|act_loss: -0.0121917724609375|cri_loss: -0.00533294677734375|unsuper_loss: 0.0
-average reward score: 5.9140625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.09%) |Training time=0.49s (22.28%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3716|ppo_ep: 1|act_loss: -0.04791259765625|cri_loss: -0.0214996337890625|unsuper_loss: 0.0
-average reward score: 5.19140625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.10%) |Training time=0.46s (21.30%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3717|ppo_ep: 1|act_loss: -0.0043792724609375|cri_loss: -0.00199127197265625|unsuper_loss: 0.0
-average reward score: 5.00390625
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.02%) |Training time=0.48s (20.75%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.46
-[2023-04-14 11:04:39,479] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 3718|ppo_ep: 1|act_loss: -0.0160369873046875|cri_loss: -0.007083892822265625|unsuper_loss: 0.0
-average reward score: 5.7890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.49s (22.54%) |Others=0.09 (4.09%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-[2023-04-14 11:04:41,549] [INFO] [logging.py:96:log_dist] [Rank 0] step=3720, skipped=52, lr=[5.769954378309581e-06, 5.769954378309581e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:04:41,567] [INFO] [timer.py:199:stop] epoch=0/micro_step=3720/global_step=3720, RunningAvgSamplesPerSec=105.53938682509363, CurrSamplesPerSec=100.27136032034784, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:04:42,121] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-[2023-04-14 11:04:42,122] [INFO] [logging.py:96:log_dist] [Rank 0] step=3720, skipped=59, lr=[2.99621889280735e-06, 2.99621889280735e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3719|ppo_ep: 1|act_loss: 0.00487518310546875|cri_loss: 0.0027008056640625|unsuper_loss: 0.0
-average reward score: 5.5390625
--------------------------------------------------------------------------------------
-|E2E latency=2.64s |Gather latency=0.00s (0.00%) |Generate time=1.60s (60.67%) |Training time=0.95s (36.04%) |Others=0.09 (3.29%)|CurSamplesPerSec=12.11 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3720|ppo_ep: 1|act_loss: 0.0572509765625|cri_loss: 0.030029296875|unsuper_loss: 0.0
-average reward score: 5.40234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.60%) |Training time=0.50s (22.83%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3721|ppo_ep: 1|act_loss: 0.03192138671875|cri_loss: 0.0172576904296875|unsuper_loss: 0.0
-average reward score: 4.87109375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.63%) |Training time=0.50s (22.84%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3722|ppo_ep: 1|act_loss: 0.0190887451171875|cri_loss: 0.01143646240234375|unsuper_loss: 0.0
-average reward score: 5.48828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.74%) |Training time=0.49s (22.65%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3723|ppo_ep: 1|act_loss: 0.0341796875|cri_loss: 0.0178985595703125|unsuper_loss: 0.0
-average reward score: 5.59375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.79%) |Training time=0.49s (22.67%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3724|ppo_ep: 1|act_loss: 0.007442474365234375|cri_loss: 0.005657196044921875|unsuper_loss: 0.0
-average reward score: 4.8671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.63%) |Training time=0.49s (22.77%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3725|ppo_ep: 1|act_loss: 0.047576904296875|cri_loss: 0.026275634765625|unsuper_loss: 0.0
-average reward score: 4.34375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.29%) |Training time=0.50s (23.21%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3726|ppo_ep: 1|act_loss: -0.020843505859375|cri_loss: -0.009979248046875|unsuper_loss: 0.0
-average reward score: 5.66015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.68%) |Training time=0.49s (22.72%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3727|ppo_ep: 1|act_loss: -0.0172119140625|cri_loss: -0.0084991455078125|unsuper_loss: 0.0
-average reward score: 5.3671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.55%) |Training time=0.50s (22.93%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3728|ppo_ep: 1|act_loss: -0.02239990234375|cri_loss: -0.010528564453125|unsuper_loss: 0.0
-average reward score: 5.5390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.51%) |Training time=0.50s (22.95%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-[2023-04-14 11:05:03,709] [INFO] [logging.py:96:log_dist] [Rank 0] step=3730, skipped=52, lr=[5.7517309460030025e-06, 5.7517309460030025e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:05:03,727] [INFO] [timer.py:199:stop] epoch=0/micro_step=3730/global_step=3730, RunningAvgSamplesPerSec=105.51180342958422, CurrSamplesPerSec=98.24020143168742, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:05:03,821] [INFO] [logging.py:96:log_dist] [Rank 0] step=3730, skipped=59, lr=[2.9867817652923354e-06, 2.9867817652923354e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3729|ppo_ep: 1|act_loss: -0.0194091796875|cri_loss: -0.009429931640625|unsuper_loss: 0.0
-average reward score: 4.7265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.96%) |Training time=0.49s (22.49%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3730|ppo_ep: 1|act_loss: -0.02294921875|cri_loss: -0.01116180419921875|unsuper_loss: 0.0
-average reward score: 5.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.87%) |Training time=0.49s (22.58%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3731|ppo_ep: 1|act_loss: -0.0149078369140625|cri_loss: -0.006526947021484375|unsuper_loss: 0.0
-average reward score: 4.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.15%) |Training time=0.48s (22.30%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3732|ppo_ep: 1|act_loss: 0.040771484375|cri_loss: 0.021240234375|unsuper_loss: 0.0
-average reward score: 5.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.33%) |Training time=0.48s (20.48%) |Others=0.10 (4.20%)|CurSamplesPerSec=13.70 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3733|ppo_ep: 1|act_loss: 0.03448486328125|cri_loss: 0.0180511474609375|unsuper_loss: 0.0
-average reward score: 5.48828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.12%) |Training time=0.49s (22.37%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3734|ppo_ep: 1|act_loss: 0.04742431640625|cri_loss: 0.02508544921875|unsuper_loss: 0.0
-average reward score: 5.1015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.22%) |Training time=0.48s (22.26%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3735|ppo_ep: 1|act_loss: 0.0178375244140625|cri_loss: 0.00946807861328125|unsuper_loss: 0.0
-average reward score: 4.83203125
--------------------------------------------------------------------------------------
-|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.59s (67.02%) |Training time=0.48s (20.40%) |Others=0.30 (12.59%)|CurSamplesPerSec=13.50 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3736|ppo_ep: 1|act_loss: 0.0396728515625|cri_loss: 0.0210418701171875|unsuper_loss: 0.0
-average reward score: 6.1640625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.61%) |Training time=0.48s (22.08%) |Others=0.12 (5.31%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3737|ppo_ep: 1|act_loss: 0.00572967529296875|cri_loss: 0.0031280517578125|unsuper_loss: 0.0
-average reward score: 4.50390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.91%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3738|ppo_ep: 1|act_loss: 0.01273345947265625|cri_loss: 0.00933074951171875|unsuper_loss: 0.0
-average reward score: 4.9140625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.80%) |Training time=0.49s (22.68%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
-[2023-04-14 11:05:25,794] [INFO] [logging.py:96:log_dist] [Rank 0] step=3740, skipped=52, lr=[5.733493777308187e-06, 5.733493777308187e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:05:25,813] [INFO] [timer.py:199:stop] epoch=0/micro_step=3740/global_step=3740, RunningAvgSamplesPerSec=105.49538247453388, CurrSamplesPerSec=100.76580001006023, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:05:25,905] [INFO] [logging.py:96:log_dist] [Rank 0] step=3740, skipped=59, lr=[2.977337422497263e-06, 2.977337422497263e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3739|ppo_ep: 1|act_loss: -0.03662109375|cri_loss: -0.0174713134765625|unsuper_loss: 0.0
-average reward score: 5.36328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.33%) |Training time=0.48s (22.16%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3740|ppo_ep: 1|act_loss: -0.02685546875|cri_loss: -0.01302337646484375|unsuper_loss: 0.0
-average reward score: 5.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.61%) |Training time=0.50s (22.89%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3741|ppo_ep: 1|act_loss: -0.000637054443359375|cri_loss: 0.001438140869140625|unsuper_loss: 0.0
-average reward score: 5.6796875
--------------------------------------------------------------------------------------
-|E2E latency=2.42s |Gather latency=0.00s (0.00%) |Generate time=1.61s (66.66%) |Training time=0.48s (19.73%) |Others=0.33 (13.61%)|CurSamplesPerSec=13.23 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3742|ppo_ep: 1|act_loss: -0.0020751953125|cri_loss: -0.0006694793701171875|unsuper_loss: 0.0
-average reward score: 5.17578125
--------------------------------------------------------------------------------------
-|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.66s (72.44%) |Training time=0.47s (20.73%) |Others=0.16 (6.83%)|CurSamplesPerSec=13.99 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3743|ppo_ep: 1|act_loss: -0.0175628662109375|cri_loss: -0.0082855224609375|unsuper_loss: 0.0
-average reward score: 3.8359375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.62%) |Training time=0.45s (20.86%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3744|ppo_ep: 1|act_loss: 0.035675048828125|cri_loss: 0.0195159912109375|unsuper_loss: 0.0
-average reward score: 5.43359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.40%) |Training time=0.44s (20.06%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3745|ppo_ep: 1|act_loss: -0.037872314453125|cri_loss: -0.0181121826171875|unsuper_loss: 0.0
-average reward score: 4.8359375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.82%) |Training time=0.48s (21.66%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3746|ppo_ep: 1|act_loss: -0.006008148193359375|cri_loss: -0.002582550048828125|unsuper_loss: 0.0
-average reward score: 5.44140625
--------------------------------------------------------------------------------------
-|E2E latency=2.39s |Gather latency=0.00s (0.00%) |Generate time=1.80s (75.25%) |Training time=0.49s (20.63%) |Others=0.10 (4.13%)|CurSamplesPerSec=13.39 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3747|ppo_ep: 1|act_loss: -0.001678466796875|cri_loss: -0.0007061958312988281|unsuper_loss: 0.0
-average reward score: 5.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.73%) |Training time=0.44s (20.68%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3748|ppo_ep: 1|act_loss: 0.01319122314453125|cri_loss: 0.00751495361328125|unsuper_loss: 0.0
-average reward score: 5.69140625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.06%) |Training time=0.44s (20.31%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
-[2023-04-14 11:05:48,055] [INFO] [logging.py:96:log_dist] [Rank 0] step=3750, skipped=52, lr=[5.7152431425439704e-06, 5.7152431425439704e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:05:48,073] [INFO] [timer.py:199:stop] epoch=0/micro_step=3750/global_step=3750, RunningAvgSamplesPerSec=105.49968423227209, CurrSamplesPerSec=121.1488010831547, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:05:48,166] [INFO] [logging.py:96:log_dist] [Rank 0] step=3750, skipped=59, lr=[2.9678860044100745e-06, 2.9678860044100745e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3749|ppo_ep: 1|act_loss: -0.014923095703125|cri_loss: -0.0067596435546875|unsuper_loss: 0.0
-average reward score: 4.671875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.50%) |Training time=0.43s (19.92%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3750|ppo_ep: 1|act_loss: 0.04046630859375|cri_loss: 0.0205535888671875|unsuper_loss: 0.0
-average reward score: 5.59375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.05%) |Training time=0.44s (20.32%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3751|ppo_ep: 1|act_loss: 0.0002613067626953125|cri_loss: 0.0005140304565429688|unsuper_loss: 0.0
-average reward score: 5.6484375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.54%) |Training time=0.45s (20.87%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3752|ppo_ep: 1|act_loss: 0.02374267578125|cri_loss: 0.01258087158203125|unsuper_loss: 0.0
-average reward score: 6.01953125
--------------------------------------------------------------------------------------
-|E2E latency=2.63s |Gather latency=0.00s (0.00%) |Generate time=1.60s (60.84%) |Training time=0.45s (17.18%) |Others=0.58 (21.99%)|CurSamplesPerSec=12.18 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3753|ppo_ep: 1|act_loss: -0.004230499267578125|cri_loss: -0.001766204833984375|unsuper_loss: 0.0
-average reward score: 4.390625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.26%) |Training time=0.45s (21.07%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3754|ppo_ep: 1|act_loss: -0.0200347900390625|cri_loss: -0.0087890625|unsuper_loss: 0.0
-average reward score: 5.26953125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.67%) |Training time=0.44s (20.74%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3755|ppo_ep: 1|act_loss: -0.004261016845703125|cri_loss: -0.0016603469848632812|unsuper_loss: 0.0
-average reward score: 5.82421875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.24%) |Training time=0.45s (21.10%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3756|ppo_ep: 1|act_loss: -0.032989501953125|cri_loss: -0.0160675048828125|unsuper_loss: 0.0
-average reward score: 5.93359375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.39%) |Training time=0.45s (21.03%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3757|ppo_ep: 1|act_loss: 0.015899658203125|cri_loss: 0.0082550048828125|unsuper_loss: 0.0
-average reward score: 5.26171875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.80%) |Training time=0.44s (20.51%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3758|ppo_ep: 1|act_loss: 0.00212860107421875|cri_loss: 0.002384185791015625|unsuper_loss: 0.0
-average reward score: 5.05859375
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.60s (69.51%) |Training time=0.44s (19.25%) |Others=0.26 (11.24%)|CurSamplesPerSec=13.87 |AvgSamplesPerSec=14.46
-[2023-04-14 11:06:10,155] [INFO] [logging.py:96:log_dist] [Rank 0] step=3760, skipped=52, lr=[5.6969793122287855e-06, 5.6969793122287855e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:06:10,173] [INFO] [timer.py:199:stop] epoch=0/micro_step=3760/global_step=3760, RunningAvgSamplesPerSec=105.51794677764592, CurrSamplesPerSec=115.36689304357319, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:06:10,265] [INFO] [logging.py:96:log_dist] [Rank 0] step=3760, skipped=59, lr=[2.9584276511235884e-06, 2.9584276511235884e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3759|ppo_ep: 1|act_loss: -0.00699615478515625|cri_loss: -0.0029449462890625|unsuper_loss: 0.0
-average reward score: 4.09375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.77%) |Training time=0.44s (20.55%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3760|ppo_ep: 1|act_loss: -0.0217742919921875|cri_loss: -0.00971221923828125|unsuper_loss: 0.0
-average reward score: 5.3359375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.03%) |Training time=0.44s (20.35%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3761|ppo_ep: 1|act_loss: -0.029144287109375|cri_loss: -0.0140380859375|unsuper_loss: 0.0
-average reward score: 5.48828125
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.45%) |Training time=0.51s (22.99%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.46 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3762|ppo_ep: 1|act_loss: -0.08831787109375|cri_loss: -0.04180908203125|unsuper_loss: 0.0
-average reward score: 5.45703125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.11%) |Training time=0.46s (21.30%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3763|ppo_ep: 1|act_loss: 0.020843505859375|cri_loss: 0.01064300537109375|unsuper_loss: 0.0
-average reward score: 5.05859375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.69%) |Training time=0.46s (21.57%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3764|ppo_ep: 1|act_loss: 0.033203125|cri_loss: 0.0171661376953125|unsuper_loss: 0.0
-average reward score: 4.83203125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.16%) |Training time=0.46s (21.26%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3765|ppo_ep: 1|act_loss: -0.01349639892578125|cri_loss: -0.00501251220703125|unsuper_loss: 0.0
-average reward score: 5.8984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.57%) |Training time=0.47s (21.77%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3766|ppo_ep: 1|act_loss: 0.0176239013671875|cri_loss: 0.00907135009765625|unsuper_loss: 0.0
-average reward score: 5.2734375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.91%) |Training time=0.46s (21.50%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3767|ppo_ep: 1|act_loss: -0.004486083984375|cri_loss: -0.0013856887817382812|unsuper_loss: 0.0
-average reward score: 4.35546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.64%) |Training time=0.47s (21.71%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3768|ppo_ep: 1|act_loss: -0.0341796875|cri_loss: -0.016265869140625|unsuper_loss: 0.0
-average reward score: 5.515625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.91%) |Training time=0.46s (21.42%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
-[2023-04-14 11:06:31,673] [INFO] [logging.py:96:log_dist] [Rank 0] step=3770, skipped=52, lr=[5.678702557076659e-06, 5.678702557076659e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:06:31,691] [INFO] [timer.py:199:stop] epoch=0/micro_step=3770/global_step=3770, RunningAvgSamplesPerSec=105.52253519060744, CurrSamplesPerSec=106.89682394300014, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:06:31,784] [INFO] [logging.py:96:log_dist] [Rank 0] step=3770, skipped=59, lr=[2.9489625028334145e-06, 2.9489625028334145e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3769|ppo_ep: 1|act_loss: 0.001468658447265625|cri_loss: 0.0014200210571289062|unsuper_loss: 0.0
-average reward score: 5.06640625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.82%) |Training time=0.46s (21.53%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3770|ppo_ep: 1|act_loss: 0.00737762451171875|cri_loss: 0.0038051605224609375|unsuper_loss: 0.0
-average reward score: 4.51171875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.47s (21.65%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3771|ppo_ep: 1|act_loss: 0.0081634521484375|cri_loss: 0.00421142578125|unsuper_loss: 0.0
-average reward score: 5.03125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.53%) |Training time=0.48s (21.96%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3772|ppo_ep: 1|act_loss: -0.021820068359375|cri_loss: -0.00951385498046875|unsuper_loss: 0.0
-average reward score: 4.21875
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.50%) |Training time=0.48s (21.18%) |Others=0.10 (4.32%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3773|ppo_ep: 1|act_loss: -0.02081298828125|cri_loss: -0.0070648193359375|unsuper_loss: 0.0
-average reward score: 4.50390625
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.59s (70.19%) |Training time=0.46s (20.09%) |Others=0.22 (9.72%)|CurSamplesPerSec=14.12 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3774|ppo_ep: 1|act_loss: -0.003711700439453125|cri_loss: -0.0015134811401367188|unsuper_loss: 0.0
-average reward score: 4.95703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.18%) |Training time=0.46s (21.15%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3775|ppo_ep: 1|act_loss: -0.018524169921875|cri_loss: -0.0089569091796875|unsuper_loss: 0.0
-average reward score: 4.953125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (21.02%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3776|ppo_ep: 1|act_loss: 0.0028858184814453125|cri_loss: 0.0022220611572265625|unsuper_loss: 0.0
-average reward score: 5.6640625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.50%) |Training time=0.43s (19.88%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3777|ppo_ep: 1|act_loss: 0.037353515625|cri_loss: 0.01947021484375|unsuper_loss: 0.0
-average reward score: 5.20703125
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.44%) |Training time=0.47s (20.25%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.70 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3778|ppo_ep: 1|act_loss: 0.0109710693359375|cri_loss: 0.005802154541015625|unsuper_loss: 0.0
-average reward score: 4.734375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.87%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
-[2023-04-14 11:06:53,644] [INFO] [logging.py:96:log_dist] [Rank 0] step=3780, skipped=52, lr=[5.6604131479931914e-06, 5.6604131479931914e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:06:53,662] [INFO] [timer.py:199:stop] epoch=0/micro_step=3780/global_step=3780, RunningAvgSamplesPerSec=105.52500263158235, CurrSamplesPerSec=103.33717113747495, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:06:53,754] [INFO] [logging.py:96:log_dist] [Rank 0] step=3780, skipped=59, lr=[2.939490699835887e-06, 2.939490699835887e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3779|ppo_ep: 1|act_loss: 0.00405120849609375|cri_loss: 0.002605438232421875|unsuper_loss: 0.0
-average reward score: 6.30859375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.46%) |Training time=0.47s (21.93%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3780|ppo_ep: 1|act_loss: -0.017364501953125|cri_loss: -0.0079803466796875|unsuper_loss: 0.0
-average reward score: 5.4453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.82%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3781|ppo_ep: 1|act_loss: 0.0308074951171875|cri_loss: 0.0172119140625|unsuper_loss: 0.0
-average reward score: 4.98828125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.48%) |Training time=0.47s (21.86%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3782|ppo_ep: 1|act_loss: -0.003948211669921875|cri_loss: -0.0017042160034179688|unsuper_loss: 0.0
-average reward score: 5.53125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.98%) |Training time=0.48s (22.45%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3783|ppo_ep: 1|act_loss: -0.0244140625|cri_loss: -0.0100250244140625|unsuper_loss: 0.0
-average reward score: 5.87109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.52%) |Training time=0.47s (21.82%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3784|ppo_ep: 1|act_loss: -0.016387939453125|cri_loss: -0.007472991943359375|unsuper_loss: 0.0
-average reward score: 5.04296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.83%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3785|ppo_ep: 1|act_loss: -0.031341552734375|cri_loss: -0.0128021240234375|unsuper_loss: 0.0
-average reward score: 5.25390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.48s (22.10%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3786|ppo_ep: 1|act_loss: 0.003833770751953125|cri_loss: 0.002223968505859375|unsuper_loss: 0.0
-average reward score: 5.15234375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.87%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3787|ppo_ep: 1|act_loss: 0.01019287109375|cri_loss: 0.0052642822265625|unsuper_loss: 0.0
-average reward score: 6.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.33%) |Training time=0.48s (22.05%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3788|ppo_ep: 1|act_loss: 0.01226806640625|cri_loss: 0.0078887939453125|unsuper_loss: 0.0
-average reward score: 5.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.44%) |Training time=0.47s (21.92%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-[2023-04-14 11:07:15,210] [INFO] [logging.py:96:log_dist] [Rank 0] step=3790, skipped=52, lr=[5.642111356071544e-06, 5.642111356071544e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:07:15,508] [INFO] [timer.py:199:stop] epoch=0/micro_step=3790/global_step=3790, RunningAvgSamplesPerSec=105.49148721233601, CurrSamplesPerSec=53.862661932836275, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:07:15,602] [INFO] [logging.py:96:log_dist] [Rank 0] step=3790, skipped=59, lr=[2.9300123825259737e-06, 2.9300123825259737e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3789|ppo_ep: 1|act_loss: 0.031036376953125|cri_loss: 0.0164794921875|unsuper_loss: 0.0
-average reward score: 5.59375
--------------------------------------------------------------------------------------
-|E2E latency=2.44s |Gather latency=0.00s (0.00%) |Generate time=1.58s (64.83%) |Training time=0.76s (31.04%) |Others=0.10 (4.12%)|CurSamplesPerSec=13.12 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3790|ppo_ep: 1|act_loss: 0.0275115966796875|cri_loss: 0.0144805908203125|unsuper_loss: 0.0
-average reward score: 6.15625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.48s (21.99%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3791|ppo_ep: 1|act_loss: 0.03350830078125|cri_loss: 0.0183563232421875|unsuper_loss: 0.0
-average reward score: 5.69921875
--------------------------------------------------------------------------------------
-|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.68%) |Training time=0.50s (21.06%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.61 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3792|ppo_ep: 1|act_loss: 0.039764404296875|cri_loss: 0.0226287841796875|unsuper_loss: 0.0
-average reward score: 5.9375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.92%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3793|ppo_ep: 1|act_loss: -0.01373291015625|cri_loss: -0.006465911865234375|unsuper_loss: 0.0
-average reward score: 5.77734375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.47s (21.95%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3794|ppo_ep: 1|act_loss: -0.0089263916015625|cri_loss: -0.003650665283203125|unsuper_loss: 0.0
-average reward score: 5.875
--------------------------------------------------------------------------------------
-|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.16%) |Training time=0.47s (21.27%) |Others=0.17 (7.57%)|CurSamplesPerSec=14.38 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3795|ppo_ep: 1|act_loss: -0.058349609375|cri_loss: -0.0270233154296875|unsuper_loss: 0.0
-average reward score: 4.83203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.48s (22.06%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3796|ppo_ep: 1|act_loss: -0.0509033203125|cri_loss: -0.024871826171875|unsuper_loss: 0.0
-average reward score: 4.83984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.62%) |Training time=0.47s (21.83%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3797|ppo_ep: 1|act_loss: -0.01873779296875|cri_loss: -0.0084228515625|unsuper_loss: 0.0
-average reward score: 5.01171875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.22%) |Training time=0.48s (22.15%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3798|ppo_ep: 1|act_loss: -0.00775909423828125|cri_loss: -0.0035953521728515625|unsuper_loss: 0.0
-average reward score: 5.2578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.34%) |Training time=0.48s (22.12%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-[2023-04-14 11:07:37,337] [INFO] [logging.py:96:log_dist] [Rank 0] step=3800, skipped=52, lr=[5.623797452588428e-06, 5.623797452588428e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:07:37,355] [INFO] [timer.py:199:stop] epoch=0/micro_step=3800/global_step=3800, RunningAvgSamplesPerSec=105.48203854925829, CurrSamplesPerSec=105.53218149137692, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:07:37,448] [INFO] [logging.py:96:log_dist] [Rank 0] step=3800, skipped=59, lr=[2.9205276913952023e-06, 2.9205276913952023e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3799|ppo_ep: 1|act_loss: 0.007511138916015625|cri_loss: 0.00420379638671875|unsuper_loss: 0.0
-average reward score: 4.9296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.74%) |Training time=0.47s (21.63%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3800|ppo_ep: 1|act_loss: 0.02117919921875|cri_loss: 0.01085662841796875|unsuper_loss: 0.0
-average reward score: 6.04296875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.68%) |Training time=0.47s (21.53%) |Others=0.10 (4.79%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3801|ppo_ep: 1|act_loss: 0.028350830078125|cri_loss: 0.01506805419921875|unsuper_loss: 0.0
-average reward score: 4.0625
--------------------------------------------------------------------------------------
-|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.64s (71.46%) |Training time=0.56s (24.16%) |Others=0.10 (4.39%)|CurSamplesPerSec=13.92 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3802|ppo_ep: 1|act_loss: 0.019287109375|cri_loss: 0.010101318359375|unsuper_loss: 0.0
-average reward score: 5.16015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.47%) |Training time=0.47s (21.98%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3803|ppo_ep: 1|act_loss: 0.031219482421875|cri_loss: 0.016265869140625|unsuper_loss: 0.0
-average reward score: 4.6953125
--------------------------------------------------------------------------------------
-|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.59s (67.29%) |Training time=0.47s (19.70%) |Others=0.31 (13.01%)|CurSamplesPerSec=13.52 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3804|ppo_ep: 1|act_loss: -0.0006227493286132812|cri_loss: 0.0002913475036621094|unsuper_loss: 0.0
-average reward score: 6.16796875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.68%) |Training time=0.47s (21.76%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3805|ppo_ep: 1|act_loss: 0.031890869140625|cri_loss: 0.0168304443359375|unsuper_loss: 0.0
-average reward score: 5.765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.16%) |Training time=0.48s (22.19%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3806|ppo_ep: 1|act_loss: -0.005077362060546875|cri_loss: -0.0024261474609375|unsuper_loss: 0.0
-average reward score: 5.09765625
--------------------------------------------------------------------------------------
-|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.60%) |Training time=0.50s (21.22%) |Others=0.10 (4.18%)|CurSamplesPerSec=13.54 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3807|ppo_ep: 1|act_loss: 0.024078369140625|cri_loss: 0.0128631591796875|unsuper_loss: 0.0
-average reward score: 5.3828125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.78%) |Training time=0.46s (21.57%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3808|ppo_ep: 1|act_loss: -0.0394287109375|cri_loss: -0.01849365234375|unsuper_loss: 0.0
-average reward score: 5.20703125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.98%) |Training time=0.46s (21.45%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
-[2023-04-14 11:07:59,978] [INFO] [logging.py:96:log_dist] [Rank 0] step=3810, skipped=52, lr=[5.605471709000069e-06, 5.605471709000069e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:07:59,996] [INFO] [timer.py:199:stop] epoch=0/micro_step=3810/global_step=3810, RunningAvgSamplesPerSec=105.47664181442124, CurrSamplesPerSec=143.0142483899703, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:08:00,088] [INFO] [logging.py:96:log_dist] [Rank 0] step=3810, skipped=59, lr=[2.911036767029578e-06, 2.911036767029578e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3809|ppo_ep: 1|act_loss: -0.0309906005859375|cri_loss: -0.0133514404296875|unsuper_loss: 0.0
-average reward score: 5.75
--------------------------------------------------------------------------------------
-|E2E latency=2.07s |Gather latency=0.00s (0.00%) |Generate time=1.59s (76.55%) |Training time=0.39s (18.63%) |Others=0.10 (4.82%)|CurSamplesPerSec=15.43 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3810|ppo_ep: 1|act_loss: 0.0113983154296875|cri_loss: 0.006107330322265625|unsuper_loss: 0.0
-average reward score: 6.3203125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.37%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3811|ppo_ep: 1|act_loss: 0.0013666152954101562|cri_loss: 0.00107574462890625|unsuper_loss: 0.0
-average reward score: 6.32421875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.84%) |Training time=0.46s (21.51%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3812|ppo_ep: 1|act_loss: 2.09808349609375e-05|cri_loss: 0.001407623291015625|unsuper_loss: 0.0
-average reward score: 4.8203125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.17%) |Training time=0.46s (21.25%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3813|ppo_ep: 1|act_loss: 0.0174560546875|cri_loss: 0.00921630859375|unsuper_loss: 0.0
-average reward score: 5.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.46s (21.56%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3814|ppo_ep: 1|act_loss: 0.01617431640625|cri_loss: 0.0084228515625|unsuper_loss: 0.0
-average reward score: 5.453125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.83%) |Training time=0.46s (21.60%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3815|ppo_ep: 1|act_loss: -0.023834228515625|cri_loss: -0.01117706298828125|unsuper_loss: 0.0
-average reward score: 4.58984375
--------------------------------------------------------------------------------------
-|E2E latency=2.94s |Gather latency=0.00s (0.00%) |Generate time=1.59s (54.07%) |Training time=0.46s (15.80%) |Others=0.89 (30.13%)|CurSamplesPerSec=10.89 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3816|ppo_ep: 1|act_loss: 0.005046844482421875|cri_loss: 0.003204345703125|unsuper_loss: 0.0
-average reward score: 4.73046875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.69%) |Training time=0.45s (20.73%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3817|ppo_ep: 1|act_loss: 0.00411224365234375|cri_loss: 0.005214691162109375|unsuper_loss: 0.0
-average reward score: 5.6796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.89%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3818|ppo_ep: 1|act_loss: 0.0379638671875|cri_loss: 0.0204315185546875|unsuper_loss: 0.0
-average reward score: 4.8203125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.64%) |Training time=0.45s (20.85%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.46
-[2023-04-14 11:08:22,322] [INFO] [logging.py:96:log_dist] [Rank 0] step=3820, skipped=52, lr=[5.587134396938199e-06, 5.587134396938199e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:08:22,340] [INFO] [timer.py:199:stop] epoch=0/micro_step=3820/global_step=3820, RunningAvgSamplesPerSec=105.48597808754263, CurrSamplesPerSec=126.9696646911081, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:08:22,433] [INFO] [logging.py:96:log_dist] [Rank 0] step=3820, skipped=59, lr=[2.9015397501074932e-06, 2.9015397501074932e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3819|ppo_ep: 1|act_loss: -0.00598907470703125|cri_loss: -0.00261688232421875|unsuper_loss: 0.0
-average reward score: 5.70703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.24%) |Training time=0.42s (19.22%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
-[2023-04-14 11:08:24,724] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 3820|ppo_ep: 1|act_loss: -0.0023822784423828125|cri_loss: -0.0002593994140625|unsuper_loss: 0.0
-average reward score: 5.48828125
--------------------------------------------------------------------------------------
-|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.61s (70.45%) |Training time=0.59s (25.67%) |Others=0.09 (3.87%)|CurSamplesPerSec=13.97 |AvgSamplesPerSec=14.46
-[2023-04-14 11:08:26,907] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 3821|ppo_ep: 1|act_loss: -0.03167724609375|cri_loss: -0.015045166015625|unsuper_loss: 0.0
-average reward score: 4.75
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.45%) |Training time=0.47s (21.52%) |Others=0.09 (4.03%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3822|ppo_ep: 1|act_loss: -0.001605987548828125|cri_loss: -0.0004248619079589844|unsuper_loss: 0.0
-average reward score: 4.37890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.65%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3823|ppo_ep: 1|act_loss: -0.0477294921875|cri_loss: -0.022308349609375|unsuper_loss: 0.0
-average reward score: 4.9765625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.28%) |Training time=0.48s (22.10%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3824|ppo_ep: 1|act_loss: 0.05181884765625|cri_loss: 0.028594970703125|unsuper_loss: 0.0
-average reward score: 5.33984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.62%) |Training time=0.45s (20.83%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3825|ppo_ep: 1|act_loss: -0.0362548828125|cri_loss: -0.0172576904296875|unsuper_loss: 0.0
-average reward score: 5.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.52%) |Training time=0.48s (21.83%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3826|ppo_ep: 1|act_loss: -0.0181884765625|cri_loss: -0.00885009765625|unsuper_loss: 0.0
-average reward score: 6.09375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.04%) |Training time=0.47s (21.43%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3827|ppo_ep: 1|act_loss: 0.01497650146484375|cri_loss: 0.00775909423828125|unsuper_loss: 0.0
-average reward score: 6.15234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.56%) |Training time=0.47s (21.80%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3828|ppo_ep: 1|act_loss: -0.00310516357421875|cri_loss: -0.001155853271484375|unsuper_loss: 0.0
-average reward score: 5.66015625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.95%) |Training time=0.49s (22.46%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.46
-[2023-04-14 11:08:44,218] [INFO] [logging.py:96:log_dist] [Rank 0] step=3830, skipped=52, lr=[5.568785788206016e-06, 5.568785788206016e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:08:45,110] [INFO] [timer.py:199:stop] epoch=0/micro_step=3830/global_step=3830, RunningAvgSamplesPerSec=105.39178371825996, CurrSamplesPerSec=26.9550695045682, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:08:45,204] [INFO] [logging.py:96:log_dist] [Rank 0] step=3830, skipped=61, lr=[2.8939378445227608e-06, 2.8939378445227608e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3829|ppo_ep: 1|act_loss: 0.023345947265625|cri_loss: 0.01226043701171875|unsuper_loss: 0.0
-average reward score: 4.90234375
--------------------------------------------------------------------------------------
-|E2E latency=3.07s |Gather latency=0.00s (0.00%) |Generate time=1.61s (52.64%) |Training time=1.35s (44.08%) |Others=0.10 (3.28%)|CurSamplesPerSec=10.43 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3830|ppo_ep: 1|act_loss: 0.0284423828125|cri_loss: 0.0149993896484375|unsuper_loss: 0.0
-average reward score: 6.21875
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.77%) |Training time=0.48s (20.91%) |Others=0.10 (4.32%)|CurSamplesPerSec=14.05 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3831|ppo_ep: 1|act_loss: 0.025787353515625|cri_loss: 0.01403045654296875|unsuper_loss: 0.0
-average reward score: 5.296875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.22%) |Training time=0.48s (22.17%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3832|ppo_ep: 1|act_loss: 0.03790283203125|cri_loss: 0.01959228515625|unsuper_loss: 0.0
-average reward score: 5.53125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.76%) |Training time=0.47s (21.72%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3833|ppo_ep: 1|act_loss: 0.0158233642578125|cri_loss: 0.00843048095703125|unsuper_loss: 0.0
-average reward score: 5.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.90%) |Training time=0.46s (21.39%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3834|ppo_ep: 1|act_loss: -0.0014019012451171875|cri_loss: 6.198883056640625e-05|unsuper_loss: 0.0
-average reward score: 5.3984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.95%) |Training time=0.47s (21.50%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3835|ppo_ep: 1|act_loss: -0.026611328125|cri_loss: -0.0128631591796875|unsuper_loss: 0.0
-average reward score: 6.39453125
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.04%) |Training time=0.46s (19.65%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.65 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3836|ppo_ep: 1|act_loss: -0.03594970703125|cri_loss: -0.0166778564453125|unsuper_loss: 0.0
-average reward score: 5.04296875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.72%) |Training time=0.47s (21.74%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3837|ppo_ep: 1|act_loss: 0.0117034912109375|cri_loss: 0.008209228515625|unsuper_loss: 0.0
-average reward score: 5.5546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.28%) |Training time=0.48s (22.18%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3838|ppo_ep: 1|act_loss: -0.00482940673828125|cri_loss: -0.002147674560546875|unsuper_loss: 0.0
-average reward score: 5.25390625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.04%) |Training time=0.49s (22.35%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
-[2023-04-14 11:09:07,082] [INFO] [logging.py:96:log_dist] [Rank 0] step=3840, skipped=52, lr=[5.550426154774167e-06, 5.550426154774167e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:09:07,100] [INFO] [timer.py:199:stop] epoch=0/micro_step=3840/global_step=3840, RunningAvgSamplesPerSec=105.38504342863314, CurrSamplesPerSec=101.54163110909366, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:09:07,193] [INFO] [logging.py:96:log_dist] [Rank 0] step=3840, skipped=61, lr=[2.8844302157955294e-06, 2.8844302157955294e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3839|ppo_ep: 1|act_loss: 0.00982666015625|cri_loss: 0.005313873291015625|unsuper_loss: 0.0
-average reward score: 5.8515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.48s (22.10%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3840|ppo_ep: 1|act_loss: -0.0025463104248046875|cri_loss: -0.000339508056640625|unsuper_loss: 0.0
-average reward score: 5.328125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (22.08%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3841|ppo_ep: 1|act_loss: 0.02935791015625|cri_loss: 0.01509857177734375|unsuper_loss: 0.0
-average reward score: 4.37890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.30%) |Training time=0.48s (22.09%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3842|ppo_ep: 1|act_loss: 0.0176239013671875|cri_loss: 0.00960540771484375|unsuper_loss: 0.0
-average reward score: 5.0625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.29%) |Training time=0.48s (22.17%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3843|ppo_ep: 1|act_loss: 0.036529541015625|cri_loss: 0.018768310546875|unsuper_loss: 0.0
-average reward score: 5.59765625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.28%) |Training time=0.48s (22.09%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3844|ppo_ep: 1|act_loss: 0.022552490234375|cri_loss: 0.01169586181640625|unsuper_loss: 0.0
-average reward score: 5.55859375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.28%) |Training time=0.48s (22.18%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3845|ppo_ep: 1|act_loss: 0.029296875|cri_loss: 0.01503753662109375|unsuper_loss: 0.0
-average reward score: 4.78515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.20%) |Training time=0.48s (22.27%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3846|ppo_ep: 1|act_loss: -0.00392913818359375|cri_loss: -0.001155853271484375|unsuper_loss: 0.0
-average reward score: 5.19140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.99%) |Training time=0.49s (22.48%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3847|ppo_ep: 1|act_loss: 0.02215576171875|cri_loss: 0.011810302734375|unsuper_loss: 0.0
-average reward score: 5.90234375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.41%) |Training time=0.47s (21.94%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3848|ppo_ep: 1|act_loss: -0.0194549560546875|cri_loss: -0.00933074951171875|unsuper_loss: 0.0
-average reward score: 5.67578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.14%) |Training time=0.48s (22.34%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-[2023-04-14 11:09:28,729] [INFO] [logging.py:96:log_dist] [Rank 0] step=3850, skipped=52, lr=[5.5320557687767085e-06, 5.5320557687767085e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:09:28,747] [INFO] [timer.py:199:stop] epoch=0/micro_step=3850/global_step=3850, RunningAvgSamplesPerSec=105.37252725245156, CurrSamplesPerSec=102.7891293792571, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:09:28,840] [INFO] [logging.py:96:log_dist] [Rank 0] step=3850, skipped=61, lr=[2.8749168888851126e-06, 2.8749168888851126e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3849|ppo_ep: 1|act_loss: 0.01404571533203125|cri_loss: 0.007328033447265625|unsuper_loss: 0.0
-average reward score: 4.97265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.47s (21.93%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3850|ppo_ep: 1|act_loss: 0.01168060302734375|cri_loss: 0.00693511962890625|unsuper_loss: 0.0
-average reward score: 5.19140625
--------------------------------------------------------------------------------------
-|E2E latency=3.28s |Gather latency=0.00s (0.00%) |Generate time=1.76s (53.80%) |Training time=0.48s (14.71%) |Others=1.03 (31.49%)|CurSamplesPerSec=9.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3851|ppo_ep: 1|act_loss: 0.00482177734375|cri_loss: 0.0029544830322265625|unsuper_loss: 0.0
-average reward score: 5.19140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.51%) |Training time=0.47s (21.84%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3852|ppo_ep: 1|act_loss: -0.00400543212890625|cri_loss: -0.0009441375732421875|unsuper_loss: 0.0
-average reward score: 5.421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.09%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3853|ppo_ep: 1|act_loss: 0.00644683837890625|cri_loss: 0.003875732421875|unsuper_loss: 0.0
-average reward score: 6.10546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.19%) |Training time=0.48s (22.18%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3854|ppo_ep: 1|act_loss: 0.0124053955078125|cri_loss: 0.00745391845703125|unsuper_loss: 0.0
-average reward score: 5.50390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.16%) |Training time=0.48s (22.30%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3855|ppo_ep: 1|act_loss: 0.0143280029296875|cri_loss: 0.007793426513671875|unsuper_loss: 0.0
-average reward score: 6.1484375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.47%) |Training time=0.48s (21.93%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3856|ppo_ep: 1|act_loss: 0.018798828125|cri_loss: 0.00968170166015625|unsuper_loss: 0.0
-average reward score: 5.70703125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.47s (21.98%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3857|ppo_ep: 1|act_loss: 0.09454345703125|cri_loss: 0.0516357421875|unsuper_loss: 0.0
-average reward score: 5.06640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.30%) |Training time=0.48s (22.06%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3858|ppo_ep: 1|act_loss: 0.0253448486328125|cri_loss: 0.01302337646484375|unsuper_loss: 0.0
-average reward score: 5.0
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.37%) |Training time=0.49s (22.16%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.46
-[2023-04-14 11:09:51,625] [INFO] [logging.py:96:log_dist] [Rank 0] step=3860, skipped=52, lr=[5.513674902507077e-06, 5.513674902507077e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:09:51,643] [INFO] [timer.py:199:stop] epoch=0/micro_step=3860/global_step=3860, RunningAvgSamplesPerSec=105.36149393062692, CurrSamplesPerSec=103.64847575007799, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:09:51,736] [INFO] [logging.py:96:log_dist] [Rank 0] step=3860, skipped=61, lr=[2.8653980048019654e-06, 2.8653980048019654e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3859|ppo_ep: 1|act_loss: -0.00516510009765625|cri_loss: -0.002277374267578125|unsuper_loss: 0.0
-average reward score: 5.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.76%) |Training time=0.47s (20.83%) |Others=0.10 (4.41%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3860|ppo_ep: 1|act_loss: -0.0166778564453125|cri_loss: -0.007465362548828125|unsuper_loss: 0.0
-average reward score: 5.73046875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.38%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3861|ppo_ep: 1|act_loss: -0.0172576904296875|cri_loss: -0.0084228515625|unsuper_loss: 0.0
-average reward score: 3.95703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.26%) |Training time=0.48s (22.13%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3862|ppo_ep: 1|act_loss: 0.013824462890625|cri_loss: 0.008209228515625|unsuper_loss: 0.0
-average reward score: 4.9921875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (22.09%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3863|ppo_ep: 1|act_loss: -0.019683837890625|cri_loss: -0.00853729248046875|unsuper_loss: 0.0
-average reward score: 5.33984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.48s (21.95%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3864|ppo_ep: 1|act_loss: 0.01343536376953125|cri_loss: 0.006969451904296875|unsuper_loss: 0.0
-average reward score: 4.765625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.65%) |Training time=0.47s (21.80%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3865|ppo_ep: 1|act_loss: -0.0426025390625|cri_loss: -0.0206146240234375|unsuper_loss: 0.0
-average reward score: 6.3671875
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.00%) |Training time=0.46s (19.77%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3866|ppo_ep: 1|act_loss: 0.01137542724609375|cri_loss: 0.0087432861328125|unsuper_loss: 0.0
-average reward score: 4.78515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.08%) |Training time=0.49s (22.41%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3867|ppo_ep: 1|act_loss: 0.05126953125|cri_loss: 0.027069091796875|unsuper_loss: 0.0
-average reward score: 4.73046875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.85%) |Training time=0.49s (22.61%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3868|ppo_ep: 1|act_loss: 0.019317626953125|cri_loss: 0.01029205322265625|unsuper_loss: 0.0
-average reward score: 5.265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.73%) |Training time=0.49s (22.73%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-[2023-04-14 11:10:13,439] [INFO] [logging.py:96:log_dist] [Rank 0] step=3870, skipped=52, lr=[5.495283828414054e-06, 5.495283828414054e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:10:13,457] [INFO] [timer.py:199:stop] epoch=0/micro_step=3870/global_step=3870, RunningAvgSamplesPerSec=105.35058199086996, CurrSamplesPerSec=98.91628601455396, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:10:13,549] [INFO] [logging.py:96:log_dist] [Rank 0] step=3870, skipped=61, lr=[2.855873704638912e-06, 2.855873704638912e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3869|ppo_ep: 1|act_loss: 0.003063201904296875|cri_loss: 0.00177764892578125|unsuper_loss: 0.0
-average reward score: 5.06640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.89%) |Training time=0.49s (22.49%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3870|ppo_ep: 1|act_loss: 0.069091796875|cri_loss: 0.035552978515625|unsuper_loss: 0.0
-average reward score: 6.2109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.81%) |Training time=0.49s (22.66%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3871|ppo_ep: 1|act_loss: 0.02947998046875|cri_loss: 0.01541900634765625|unsuper_loss: 0.0
-average reward score: 5.1953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.79%) |Training time=0.49s (22.59%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3872|ppo_ep: 1|act_loss: -0.00611114501953125|cri_loss: -0.002147674560546875|unsuper_loss: 0.0
-average reward score: 6.3359375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.30%) |Training time=0.48s (22.20%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3873|ppo_ep: 1|act_loss: -0.0058135986328125|cri_loss: -0.00225830078125|unsuper_loss: 0.0
-average reward score: 4.65625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.90%) |Training time=0.49s (22.46%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3874|ppo_ep: 1|act_loss: -0.0141143798828125|cri_loss: -0.00676727294921875|unsuper_loss: 0.0
-average reward score: 5.3828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.05%) |Training time=0.48s (22.40%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3875|ppo_ep: 1|act_loss: 0.0024662017822265625|cri_loss: 0.0014801025390625|unsuper_loss: 0.0
-average reward score: 4.8828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.18%) |Training time=0.48s (22.29%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3876|ppo_ep: 1|act_loss: -0.0263214111328125|cri_loss: -0.01288604736328125|unsuper_loss: 0.0
-average reward score: 6.01953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.27%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3877|ppo_ep: 1|act_loss: 0.031494140625|cri_loss: 0.017913818359375|unsuper_loss: 0.0
-average reward score: 5.48828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.31%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3878|ppo_ep: 1|act_loss: 0.002197265625|cri_loss: 0.0018177032470703125|unsuper_loss: 0.0
-average reward score: 4.921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.49s (22.36%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-[2023-04-14 11:10:35,093] [INFO] [logging.py:96:log_dist] [Rank 0] step=3880, skipped=52, lr=[5.476882819097722e-06, 5.476882819097722e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:10:35,112] [INFO] [timer.py:199:stop] epoch=0/micro_step=3880/global_step=3880, RunningAvgSamplesPerSec=105.33392161732702, CurrSamplesPerSec=100.01730920417752, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:10:35,204] [INFO] [logging.py:96:log_dist] [Rank 0] step=3880, skipped=61, lr=[2.846344129569057e-06, 2.846344129569057e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3879|ppo_ep: 1|act_loss: -0.0154266357421875|cri_loss: -0.007171630859375|unsuper_loss: 0.0
-average reward score: 5.5
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.49s (22.42%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3880|ppo_ep: 1|act_loss: 0.00566864013671875|cri_loss: 0.00415802001953125|unsuper_loss: 0.0
-average reward score: 4.89453125
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.17%) |Training time=0.48s (20.61%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3881|ppo_ep: 1|act_loss: 0.030059814453125|cri_loss: 0.015655517578125|unsuper_loss: 0.0
-average reward score: 5.875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.21%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3882|ppo_ep: 1|act_loss: -0.04449462890625|cri_loss: -0.0208892822265625|unsuper_loss: 0.0
-average reward score: 5.44140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.26%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3883|ppo_ep: 1|act_loss: -0.00847625732421875|cri_loss: -0.00353240966796875|unsuper_loss: 0.0
-average reward score: 5.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.99s |Gather latency=0.00s (0.00%) |Generate time=1.61s (53.79%) |Training time=0.47s (15.83%) |Others=0.91 (30.38%)|CurSamplesPerSec=10.71 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3884|ppo_ep: 1|act_loss: -0.033355712890625|cri_loss: -0.01551055908203125|unsuper_loss: 0.0
-average reward score: 5.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.20%) |Training time=0.48s (22.18%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3885|ppo_ep: 1|act_loss: 0.0021610260009765625|cri_loss: 0.0013217926025390625|unsuper_loss: 0.0
-average reward score: 5.03125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.67%) |Training time=0.47s (21.66%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3886|ppo_ep: 1|act_loss: 0.01102447509765625|cri_loss: 0.005680084228515625|unsuper_loss: 0.0
-average reward score: 5.15234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.66%) |Training time=0.47s (21.74%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3887|ppo_ep: 1|act_loss: -0.0107879638671875|cri_loss: -0.004840850830078125|unsuper_loss: 0.0
-average reward score: 4.83203125
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.11%) |Training time=0.47s (21.33%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3888|ppo_ep: 1|act_loss: 0.03143310546875|cri_loss: 0.0162811279296875|unsuper_loss: 0.0
-average reward score: 5.421875
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.67%) |Training time=0.47s (20.90%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.12 |AvgSamplesPerSec=14.46
-[2023-04-14 11:10:57,877] [INFO] [logging.py:96:log_dist] [Rank 0] step=3890, skipped=52, lr=[5.4584721473054284e-06, 5.4584721473054284e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:10:57,896] [INFO] [timer.py:199:stop] epoch=0/micro_step=3890/global_step=3890, RunningAvgSamplesPerSec=105.32536333025105, CurrSamplesPerSec=101.78936631216996, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:10:57,988] [INFO] [logging.py:96:log_dist] [Rank 0] step=3890, skipped=61, lr=[2.836809420843692e-06, 2.836809420843692e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3889|ppo_ep: 1|act_loss: 0.041259765625|cri_loss: 0.0216522216796875|unsuper_loss: 0.0
-average reward score: 5.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.33%) |Training time=0.48s (22.06%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3890|ppo_ep: 1|act_loss: -0.0309295654296875|cri_loss: -0.014801025390625|unsuper_loss: 0.0
-average reward score: 5.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.29%) |Training time=0.48s (22.07%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3891|ppo_ep: 1|act_loss: 0.016632080078125|cri_loss: 0.00905609130859375|unsuper_loss: 0.0
-average reward score: 5.7109375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.46%) |Training time=0.47s (21.91%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3892|ppo_ep: 1|act_loss: 0.07305908203125|cri_loss: 0.03955078125|unsuper_loss: 0.0
-average reward score: 5.05859375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.21%) |Training time=0.48s (22.18%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3893|ppo_ep: 1|act_loss: -0.0031833648681640625|cri_loss: -0.00118255615234375|unsuper_loss: 0.0
-average reward score: 4.765625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.48s (21.98%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3894|ppo_ep: 1|act_loss: -0.0030231475830078125|cri_loss: -0.0012531280517578125|unsuper_loss: 0.0
-average reward score: 5.78125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.20%) |Training time=0.48s (22.18%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3895|ppo_ep: 1|act_loss: -0.056243896484375|cri_loss: -0.0274505615234375|unsuper_loss: 0.0
-average reward score: 4.5625
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.51%) |Training time=0.47s (20.18%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3896|ppo_ep: 1|act_loss: -0.024444580078125|cri_loss: -0.0117950439453125|unsuper_loss: 0.0
-average reward score: 5.8984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.77%) |Training time=0.47s (21.59%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3897|ppo_ep: 1|act_loss: 0.00799560546875|cri_loss: 0.004730224609375|unsuper_loss: 0.0
-average reward score: 5.734375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.86%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3898|ppo_ep: 1|act_loss: 0.0122528076171875|cri_loss: 0.006862640380859375|unsuper_loss: 0.0
-average reward score: 5.33984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.29%) |Training time=0.48s (22.09%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-[2023-04-14 11:11:19,675] [INFO] [logging.py:96:log_dist] [Rank 0] step=3900, skipped=52, lr=[5.440052085927744e-06, 5.440052085927744e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:11:19,693] [INFO] [timer.py:199:stop] epoch=0/micro_step=3900/global_step=3900, RunningAvgSamplesPerSec=105.3175314438355, CurrSamplesPerSec=102.41601813027702, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:11:19,786] [INFO] [logging.py:96:log_dist] [Rank 0] step=3900, skipped=61, lr=[2.827269719790202e-06, 2.827269719790202e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3899|ppo_ep: 1|act_loss: -0.003170013427734375|cri_loss: -0.0006551742553710938|unsuper_loss: 0.0
-average reward score: 5.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.47s (21.98%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3900|ppo_ep: 1|act_loss: 0.026153564453125|cri_loss: 0.0141448974609375|unsuper_loss: 0.0
-average reward score: 5.56640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.47%) |Training time=0.47s (21.91%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3901|ppo_ep: 1|act_loss: 0.0148773193359375|cri_loss: 0.0092315673828125|unsuper_loss: 0.0
-average reward score: 5.57421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.26%) |Training time=0.48s (22.13%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3902|ppo_ep: 1|act_loss: 0.0027313232421875|cri_loss: 0.00159454345703125|unsuper_loss: 0.0
-average reward score: 6.1484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (21.97%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3903|ppo_ep: 1|act_loss: -0.05126953125|cri_loss: -0.024444580078125|unsuper_loss: 0.0
-average reward score: 4.95703125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (22.00%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3904|ppo_ep: 1|act_loss: 0.018585205078125|cri_loss: 0.0104217529296875|unsuper_loss: 0.0
-average reward score: 5.40625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.47%) |Training time=0.47s (21.91%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3905|ppo_ep: 1|act_loss: -0.03497314453125|cri_loss: -0.0162200927734375|unsuper_loss: 0.0
-average reward score: 5.390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.47s (21.93%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3906|ppo_ep: 1|act_loss: -0.01453399658203125|cri_loss: -0.00646209716796875|unsuper_loss: 0.0
-average reward score: 6.3203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.85%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3907|ppo_ep: 1|act_loss: -0.013519287109375|cri_loss: -0.00604248046875|unsuper_loss: 0.0
-average reward score: 5.9375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.64%) |Others=0.10 (4.80%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3908|ppo_ep: 1|act_loss: 0.0009617805480957031|cri_loss: 0.0007853507995605469|unsuper_loss: 0.0
-average reward score: 5.421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.84%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-[2023-04-14 11:11:41,331] [INFO] [logging.py:96:log_dist] [Rank 0] step=3910, skipped=52, lr=[5.421622907994414e-06, 5.421622907994414e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:11:41,349] [INFO] [timer.py:199:stop] epoch=0/micro_step=3910/global_step=3910, RunningAvgSamplesPerSec=105.31036604765642, CurrSamplesPerSec=103.50970950158406, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:11:41,442] [INFO] [logging.py:96:log_dist] [Rank 0] step=3910, skipped=61, lr=[2.8177251678099694e-06, 2.8177251678099694e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3909|ppo_ep: 1|act_loss: 0.005268096923828125|cri_loss: 0.003101348876953125|unsuper_loss: 0.0
-average reward score: 5.37109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.81%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3910|ppo_ep: 1|act_loss: 0.0406494140625|cri_loss: 0.021026611328125|unsuper_loss: 0.0
-average reward score: 5.24609375
--------------------------------------------------------------------------------------
-|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.03%) |Training time=0.46s (19.70%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.59 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3911|ppo_ep: 1|act_loss: 0.0535888671875|cri_loss: 0.0287322998046875|unsuper_loss: 0.0
-average reward score: 5.9296875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.18%) |Training time=0.46s (21.21%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3912|ppo_ep: 1|act_loss: -0.007843017578125|cri_loss: -0.0031261444091796875|unsuper_loss: 0.0
-average reward score: 5.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.84%) |Training time=0.47s (21.53%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3913|ppo_ep: 1|act_loss: 0.02130126953125|cri_loss: 0.01087188720703125|unsuper_loss: 0.0
-average reward score: 5.41796875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.11%) |Training time=0.46s (21.23%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3914|ppo_ep: 1|act_loss: -0.03924560546875|cri_loss: -0.017730712890625|unsuper_loss: 0.0
-average reward score: 6.35546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.70%) |Training time=0.47s (21.69%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3915|ppo_ep: 1|act_loss: -0.000682830810546875|cri_loss: 0.001865386962890625|unsuper_loss: 0.0
-average reward score: 4.71875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.56%) |Training time=0.45s (20.80%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3916|ppo_ep: 1|act_loss: -0.022735595703125|cri_loss: -0.00836181640625|unsuper_loss: 0.0
-average reward score: 5.5390625
--------------------------------------------------------------------------------------
-|E2E latency=3.21s |Gather latency=0.00s (0.00%) |Generate time=1.61s (50.36%) |Training time=0.47s (14.53%) |Others=1.13 (35.11%)|CurSamplesPerSec=9.98 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3917|ppo_ep: 1|act_loss: 0.05841064453125|cri_loss: 0.03216552734375|unsuper_loss: 0.0
-average reward score: 4.9921875
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.23%) |Training time=0.46s (20.33%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.13 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3918|ppo_ep: 1|act_loss: 0.02142333984375|cri_loss: 0.0140228271484375|unsuper_loss: 0.0
-average reward score: 5.390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.46s (21.38%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-[2023-04-14 11:12:04,287] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, but hysteresis is 2. Reducing hysteresis to 1
-[2023-04-14 11:12:04,287] [INFO] [logging.py:96:log_dist] [Rank 0] step=3920, skipped=53, lr=[5.405029078967381e-06, 5.405029078967381e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:12:04,288] [INFO] [timer.py:199:stop] epoch=0/micro_step=3920/global_step=3920, RunningAvgSamplesPerSec=105.31687299274512, CurrSamplesPerSec=117.73918688232374, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:12:04,379] [INFO] [logging.py:96:log_dist] [Rank 0] step=3920, skipped=61, lr=[2.8081759063762797e-06, 2.8081759063762797e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3919|ppo_ep: 1|act_loss: 0.022247314453125|cri_loss: 0.0158843994140625|unsuper_loss: 0.0
-average reward score: 4.6015625
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.96%) |Training time=0.43s (20.35%) |Others=0.10 (4.68%)|CurSamplesPerSec=15.02 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3920|ppo_ep: 1|act_loss: 0.0309906005859375|cri_loss: 0.016754150390625|unsuper_loss: 0.0
-average reward score: 6.3125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.30%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3921|ppo_ep: 1|act_loss: 0.02581787109375|cri_loss: 0.01375579833984375|unsuper_loss: 0.0
-average reward score: 4.296875
--------------------------------------------------------------------------------------
-|E2E latency=2.85s |Gather latency=0.00s (0.00%) |Generate time=1.60s (56.01%) |Training time=0.46s (16.14%) |Others=0.79 (27.85%)|CurSamplesPerSec=11.22 |AvgSamplesPerSec=14.46
-[2023-04-14 11:12:11,547] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 3922|ppo_ep: 1|act_loss: -0.020843505859375|cri_loss: -0.00952911376953125|unsuper_loss: 0.0
-average reward score: 5.50390625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.40%) |Training time=0.46s (21.43%) |Others=0.09 (4.17%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
-[2023-04-14 11:12:13,588] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
-[2023-04-14 11:12:13,672] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 3923|ppo_ep: 1|act_loss: -0.0687255859375|cri_loss: -0.0307464599609375|unsuper_loss: 0.0
-average reward score: 5.453125
--------------------------------------------------------------------------------------
-|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.32%) |Training time=0.43s (20.47%) |Others=0.09 (4.21%)|CurSamplesPerSec=15.06 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3924|ppo_ep: 1|act_loss: 0.0050811767578125|cri_loss: 0.0030078887939453125|unsuper_loss: 0.0
-average reward score: 5.58984375
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.60s (70.56%) |Training time=0.57s (25.04%) |Others=0.10 (4.40%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3925|ppo_ep: 1|act_loss: -0.13818359375|cri_loss: -0.047271728515625|unsuper_loss: 0.0
-average reward score: 6.3828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.01%) |Training time=0.44s (20.35%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3926|ppo_ep: 1|act_loss: -0.06524658203125|cri_loss: -0.027069091796875|unsuper_loss: 0.0
-average reward score: 5.17578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.21%) |Training time=0.46s (21.15%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3927|ppo_ep: 1|act_loss: -0.07269287109375|cri_loss: -0.0286712646484375|unsuper_loss: 0.0
-average reward score: 4.8046875
--------------------------------------------------------------------------------------
-|E2E latency=2.82s |Gather latency=0.00s (0.00%) |Generate time=1.60s (56.85%) |Training time=0.46s (16.34%) |Others=0.76 (26.80%)|CurSamplesPerSec=11.36 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3928|ppo_ep: 1|act_loss: 0.0650634765625|cri_loss: 0.034942626953125|unsuper_loss: 0.0
-average reward score: 4.80859375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.30%) |Training time=0.45s (21.06%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-[2023-04-14 11:12:27,276] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192
-[2023-04-14 11:12:27,277] [INFO] [logging.py:96:log_dist] [Rank 0] step=3930, skipped=55, lr=[5.390273156270772e-06, 5.390273156270772e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:12:27,277] [INFO] [timer.py:199:stop] epoch=0/micro_step=3930/global_step=3930, RunningAvgSamplesPerSec=105.3224964159603, CurrSamplesPerSec=117.4507008482986, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:12:27,362] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
-[2023-04-14 11:12:27,362] [INFO] [logging.py:96:log_dist] [Rank 0] step=3930, skipped=64, lr=[2.8014886970414734e-06, 2.8014886970414734e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3929|ppo_ep: 1|act_loss: 0.0504150390625|cri_loss: 0.028167724609375|unsuper_loss: 0.0
-average reward score: 4.67578125
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.29%) |Training time=0.43s (20.42%) |Others=0.09 (4.29%)|CurSamplesPerSec=15.05 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3930|ppo_ep: 1|act_loss: 0.089111328125|cri_loss: 0.047210693359375|unsuper_loss: 0.0
-average reward score: 5.7421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.84%) |Training time=0.47s (21.55%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3931|ppo_ep: 1|act_loss: -0.0012054443359375|cri_loss: 0.0017375946044921875|unsuper_loss: 0.0
-average reward score: 4.84375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.02%) |Training time=0.46s (21.35%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3932|ppo_ep: 1|act_loss: 0.01161956787109375|cri_loss: 0.006717681884765625|unsuper_loss: 0.0
-average reward score: 4.55859375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.46s (21.21%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3933|ppo_ep: 1|act_loss: 0.048187255859375|cri_loss: 0.025787353515625|unsuper_loss: 0.0
-average reward score: 5.55078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.99%) |Training time=0.46s (21.39%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3934|ppo_ep: 1|act_loss: 0.11151123046875|cri_loss: 0.0634765625|unsuper_loss: 0.0
-average reward score: 4.71484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.41%) |Training time=0.45s (20.95%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3935|ppo_ep: 1|act_loss: -0.0048980712890625|cri_loss: 0.000118255615234375|unsuper_loss: 0.0
-average reward score: 4.828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.25%) |Training time=0.46s (21.11%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3936|ppo_ep: 1|act_loss: 0.01910400390625|cri_loss: 0.026641845703125|unsuper_loss: 0.0
-average reward score: 5.4921875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.46%) |Training time=0.45s (20.88%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3937|ppo_ep: 1|act_loss: -0.007503509521484375|cri_loss: -0.001201629638671875|unsuper_loss: 0.0
-average reward score: 3.75
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.38%) |Training time=0.46s (21.04%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.46
-[2023-04-14 11:12:46,720] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096
-epoch: 0|step: 3938|ppo_ep: 1|act_loss: -0.015869140625|cri_loss: -0.006702423095703125|unsuper_loss: 0.0
-average reward score: 5.1328125
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.17%) |Training time=0.43s (20.13%) |Others=0.10 (4.70%)|CurSamplesPerSec=15.03 |AvgSamplesPerSec=14.46
-[2023-04-14 11:12:49,033] [INFO] [logging.py:96:log_dist] [Rank 0] step=3940, skipped=56, lr=[5.373666340493332e-06, 5.373666340493332e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:12:49,083] [INFO] [timer.py:199:stop] epoch=0/micro_step=3940/global_step=3940, RunningAvgSamplesPerSec=105.32905620645766, CurrSamplesPerSec=98.3321840781513, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:12:49,175] [INFO] [logging.py:96:log_dist] [Rank 0] step=3940, skipped=64, lr=[2.7919317544146405e-06, 2.7919317544146405e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3939|ppo_ep: 1|act_loss: 0.0667724609375|cri_loss: 0.049713134765625|unsuper_loss: 0.0
-average reward score: 4.6953125
--------------------------------------------------------------------------------------
-|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.08%) |Training time=0.49s (20.67%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.54 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3940|ppo_ep: 1|act_loss: -0.027984619140625|cri_loss: -0.01288604736328125|unsuper_loss: 0.0
-average reward score: 4.8671875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.83%) |Training time=0.47s (21.55%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3941|ppo_ep: 1|act_loss: -0.0123443603515625|cri_loss: -0.005859375|unsuper_loss: 0.0
-average reward score: 4.54296875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.66%) |Training time=0.47s (21.74%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3942|ppo_ep: 1|act_loss: -0.01959228515625|cri_loss: -0.00902557373046875|unsuper_loss: 0.0
-average reward score: 4.09765625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.84%) |Training time=0.47s (21.54%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3943|ppo_ep: 1|act_loss: -0.01531982421875|cri_loss: -0.005657196044921875|unsuper_loss: 0.0
-average reward score: 4.59765625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.75%) |Training time=0.47s (21.63%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3944|ppo_ep: 1|act_loss: 0.011962890625|cri_loss: 0.018524169921875|unsuper_loss: 0.0
-average reward score: 4.62109375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.12%) |Training time=0.46s (21.29%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3945|ppo_ep: 1|act_loss: 0.092529296875|cri_loss: 0.0631103515625|unsuper_loss: 0.0
-average reward score: 4.703125
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.28%) |Training time=0.47s (21.18%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3946|ppo_ep: 1|act_loss: -0.0811767578125|cri_loss: -0.03668212890625|unsuper_loss: 0.0
-average reward score: 5.59765625
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.83%) |Training time=0.47s (20.57%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3947|ppo_ep: 1|act_loss: -0.0601806640625|cri_loss: -0.0255279541015625|unsuper_loss: 0.0
-average reward score: 4.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.46%) |Training time=0.48s (21.93%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3948|ppo_ep: 1|act_loss: 0.005146026611328125|cri_loss: 0.005069732666015625|unsuper_loss: 0.0
-average reward score: 5.078125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.57%) |Training time=0.48s (21.82%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.46
-[2023-04-14 11:13:10,960] [INFO] [logging.py:96:log_dist] [Rank 0] step=3950, skipped=56, lr=[5.355206605693846e-06, 5.355206605693846e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:13:10,979] [INFO] [timer.py:199:stop] epoch=0/micro_step=3950/global_step=3950, RunningAvgSamplesPerSec=105.32556865211501, CurrSamplesPerSec=104.41765903501886, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:13:11,071] [INFO] [logging.py:96:log_dist] [Rank 0] step=3950, skipped=64, lr=[2.7823704846549614e-06, 2.7823704846549614e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3949|ppo_ep: 1|act_loss: -0.02288818359375|cri_loss: -0.0092010498046875|unsuper_loss: 0.0
-average reward score: 5.62109375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.86%) |Training time=0.47s (21.55%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3950|ppo_ep: 1|act_loss: 0.00533294677734375|cri_loss: 0.0029754638671875|unsuper_loss: 0.0
-average reward score: 4.5625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.56%) |Training time=0.48s (21.85%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3951|ppo_ep: 1|act_loss: -0.02294921875|cri_loss: -0.0074462890625|unsuper_loss: 0.0
-average reward score: 5.00390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.66%) |Training time=0.47s (21.73%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3952|ppo_ep: 1|act_loss: -0.01934814453125|cri_loss: -0.00791168212890625|unsuper_loss: 0.0
-average reward score: 5.703125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.62%) |Training time=0.48s (21.79%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3953|ppo_ep: 1|act_loss: -0.001678466796875|cri_loss: 0.00296783447265625|unsuper_loss: 0.0
-average reward score: 4.6640625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.53%) |Training time=0.48s (21.89%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3954|ppo_ep: 1|act_loss: 0.05401611328125|cri_loss: 0.03033447265625|unsuper_loss: 0.0
-average reward score: 5.73828125
--------------------------------------------------------------------------------------
-|E2E latency=2.88s |Gather latency=0.00s (0.00%) |Generate time=1.77s (61.51%) |Training time=0.47s (16.29%) |Others=0.64 (22.20%)|CurSamplesPerSec=11.12 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3955|ppo_ep: 1|act_loss: 0.0494384765625|cri_loss: 0.028076171875|unsuper_loss: 0.0
-average reward score: 4.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.24%) |Training time=0.46s (21.09%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3956|ppo_ep: 1|act_loss: 0.08917236328125|cri_loss: 0.04974365234375|unsuper_loss: 0.0
-average reward score: 4.3984375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.84%) |Training time=0.47s (21.55%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3957|ppo_ep: 1|act_loss: 0.04241943359375|cri_loss: 0.024505615234375|unsuper_loss: 0.0
-average reward score: 4.734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.91%) |Training time=0.47s (21.49%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3958|ppo_ep: 1|act_loss: 0.0198974609375|cri_loss: 0.01181793212890625|unsuper_loss: 0.0
-average reward score: 4.52734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.76%) |Training time=0.47s (21.63%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-[2023-04-14 11:13:33,404] [INFO] [logging.py:96:log_dist] [Rank 0] step=3960, skipped=56, lr=[5.3367390119534095e-06, 5.3367390119534095e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:13:33,423] [INFO] [timer.py:199:stop] epoch=0/micro_step=3960/global_step=3960, RunningAvgSamplesPerSec=105.32290912174726, CurrSamplesPerSec=106.21121211185998, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:13:33,516] [INFO] [logging.py:96:log_dist] [Rank 0] step=3960, skipped=64, lr=[2.772805029483517e-06, 2.772805029483517e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3959|ppo_ep: 1|act_loss: -0.020965576171875|cri_loss: -0.0093231201171875|unsuper_loss: 0.0
-average reward score: 5.3671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.01%) |Training time=0.46s (21.39%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3960|ppo_ep: 1|act_loss: 0.0175323486328125|cri_loss: 0.0121612548828125|unsuper_loss: 0.0
-average reward score: 5.56640625
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.61s (69.47%) |Training time=0.47s (20.32%) |Others=0.24 (10.21%)|CurSamplesPerSec=13.85 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3961|ppo_ep: 1|act_loss: -0.018310546875|cri_loss: -0.00521087646484375|unsuper_loss: 0.0
-average reward score: 5.359375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.82%) |Training time=0.47s (21.58%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3962|ppo_ep: 1|act_loss: 0.04412841796875|cri_loss: 0.03289794921875|unsuper_loss: 0.0
-average reward score: 5.0859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.29%) |Training time=0.46s (21.09%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3963|ppo_ep: 1|act_loss: 0.0105743408203125|cri_loss: 0.006397247314453125|unsuper_loss: 0.0
-average reward score: 5.08203125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.66%) |Training time=0.47s (21.74%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3964|ppo_ep: 1|act_loss: 0.006893157958984375|cri_loss: 0.008575439453125|unsuper_loss: 0.0
-average reward score: 4.8203125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.35%) |Training time=0.46s (21.06%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3965|ppo_ep: 1|act_loss: 0.0237274169921875|cri_loss: 0.01345062255859375|unsuper_loss: 0.0
-average reward score: 5.015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.07%) |Training time=0.46s (21.18%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3966|ppo_ep: 1|act_loss: 0.03277587890625|cri_loss: 0.0184478759765625|unsuper_loss: 0.0
-average reward score: 5.08203125
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (72.99%) |Training time=0.45s (20.54%) |Others=0.14 (6.47%)|CurSamplesPerSec=14.46 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3967|ppo_ep: 1|act_loss: 0.0333251953125|cri_loss: 0.0197906494140625|unsuper_loss: 0.0
-average reward score: 4.6328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.10%) |Training time=0.46s (21.29%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3968|ppo_ep: 1|act_loss: -0.0061798095703125|cri_loss: -0.0029392242431640625|unsuper_loss: 0.0
-average reward score: 5.046875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.95%) |Training time=0.47s (21.43%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
-[2023-04-14 11:13:55,489] [INFO] [logging.py:96:log_dist] [Rank 0] step=3970, skipped=56, lr=[5.318263833006314e-06, 5.318263833006314e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:13:55,507] [INFO] [timer.py:199:stop] epoch=0/micro_step=3970/global_step=3970, RunningAvgSamplesPerSec=105.32585319821645, CurrSamplesPerSec=109.12267687612147, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:13:55,603] [INFO] [logging.py:96:log_dist] [Rank 0] step=3970, skipped=64, lr=[2.7632355306834306e-06, 2.7632355306834306e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3969|ppo_ep: 1|act_loss: 0.009552001953125|cri_loss: 0.0053863525390625|unsuper_loss: 0.0
-average reward score: 4.78515625
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.08%) |Training time=0.46s (19.51%) |Others=0.10 (4.41%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3970|ppo_ep: 1|act_loss: 0.018157958984375|cri_loss: 0.0101318359375|unsuper_loss: 0.0
-average reward score: 4.83203125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.63%) |Training time=0.48s (21.76%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3971|ppo_ep: 1|act_loss: 0.004344940185546875|cri_loss: 0.00278472900390625|unsuper_loss: 0.0
-average reward score: 5.15625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.04%) |Training time=0.46s (21.28%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3972|ppo_ep: 1|act_loss: -0.0034942626953125|cri_loss: -0.001590728759765625|unsuper_loss: 0.0
-average reward score: 5.06640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.27%) |Training time=0.46s (21.08%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3973|ppo_ep: 1|act_loss: -0.0035877227783203125|cri_loss: -0.0016889572143554688|unsuper_loss: 0.0
-average reward score: 4.3984375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.66%) |Training time=0.48s (21.76%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3974|ppo_ep: 1|act_loss: 0.02191162109375|cri_loss: 0.016937255859375|unsuper_loss: 0.0
-average reward score: 4.9609375
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.64s (73.75%) |Training time=0.48s (21.73%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.40 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3975|ppo_ep: 1|act_loss: -0.0308837890625|cri_loss: -0.01500701904296875|unsuper_loss: 0.0
-average reward score: 5.4921875
--------------------------------------------------------------------------------------
-|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.38%) |Training time=0.49s (21.23%) |Others=0.10 (4.39%)|CurSamplesPerSec=13.99 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3976|ppo_ep: 1|act_loss: 0.0119476318359375|cri_loss: 0.008209228515625|unsuper_loss: 0.0
-average reward score: 5.3515625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.28%) |Training time=0.48s (22.08%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3977|ppo_ep: 1|act_loss: -0.05389404296875|cri_loss: -0.0259246826171875|unsuper_loss: 0.0
-average reward score: 4.953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.31%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3978|ppo_ep: 1|act_loss: -0.01357269287109375|cri_loss: -0.006313323974609375|unsuper_loss: 0.0
-average reward score: 5.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.33%) |Training time=0.48s (22.08%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.46
-[2023-04-14 11:14:17,440] [INFO] [logging.py:96:log_dist] [Rank 0] step=3980, skipped=56, lr=[5.2997813426992765e-06, 5.2997813426992765e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:14:17,459] [INFO] [timer.py:199:stop] epoch=0/micro_step=3980/global_step=3980, RunningAvgSamplesPerSec=105.31787614664505, CurrSamplesPerSec=97.28826867151642, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:14:17,551] [INFO] [logging.py:96:log_dist] [Rank 0] step=3980, skipped=64, lr=[2.753662130097758e-06, 2.753662130097758e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3979|ppo_ep: 1|act_loss: 0.04278564453125|cri_loss: 0.0223236083984375|unsuper_loss: 0.0
-average reward score: 5.42578125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.05%) |Training time=0.49s (22.40%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3980|ppo_ep: 1|act_loss: 0.0094451904296875|cri_loss: 0.0048828125|unsuper_loss: 0.0
-average reward score: 5.40625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.54%) |Training time=0.48s (21.88%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3981|ppo_ep: 1|act_loss: 0.016387939453125|cri_loss: 0.00946044921875|unsuper_loss: 0.0
-average reward score: 5.8203125
--------------------------------------------------------------------------------------
-|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.60s (69.80%) |Training time=0.47s (20.64%) |Others=0.22 (9.56%)|CurSamplesPerSec=13.98 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3982|ppo_ep: 1|act_loss: 0.06060791015625|cri_loss: 0.032379150390625|unsuper_loss: 0.0
-average reward score: 4.15234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.68%) |Training time=0.47s (21.71%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3983|ppo_ep: 1|act_loss: 0.026519775390625|cri_loss: 0.01392364501953125|unsuper_loss: 0.0
-average reward score: 5.0625
--------------------------------------------------------------------------------------
-|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.60s (71.41%) |Training time=0.54s (24.12%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.30 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3984|ppo_ep: 1|act_loss: -0.050628662109375|cri_loss: -0.02349853515625|unsuper_loss: 0.0
-average reward score: 4.87890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.92%) |Training time=0.46s (21.45%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3985|ppo_ep: 1|act_loss: -0.0207672119140625|cri_loss: -0.01001739501953125|unsuper_loss: 0.0
-average reward score: 3.953125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.48%) |Training time=0.48s (21.90%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3986|ppo_ep: 1|act_loss: 0.05877685546875|cri_loss: 0.03070068359375|unsuper_loss: 0.0
-average reward score: 4.40234375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.15%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3987|ppo_ep: 1|act_loss: -0.003879547119140625|cri_loss: -0.0012159347534179688|unsuper_loss: 0.0
-average reward score: 6.18359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.90%) |Training time=0.47s (21.50%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3988|ppo_ep: 1|act_loss: -0.00310516357421875|cri_loss: -0.0013723373413085938|unsuper_loss: 0.0
-average reward score: 5.609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.72%) |Training time=0.47s (21.66%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-[2023-04-14 11:14:39,322] [INFO] [logging.py:96:log_dist] [Rank 0] step=3990, skipped=56, lr=[5.281291814987394e-06, 5.281291814987394e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:14:39,341] [INFO] [timer.py:199:stop] epoch=0/micro_step=3990/global_step=3990, RunningAvgSamplesPerSec=105.31172796465408, CurrSamplesPerSec=106.26090811783111, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:14:39,433] [INFO] [logging.py:96:log_dist] [Rank 0] step=3990, skipped=64, lr=[2.7440849696273907e-06, 2.7440849696273907e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3989|ppo_ep: 1|act_loss: -0.0141143798828125|cri_loss: -0.0065765380859375|unsuper_loss: 0.0
-average reward score: 5.0
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.97%) |Training time=0.46s (21.42%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3990|ppo_ep: 1|act_loss: -0.00800323486328125|cri_loss: -0.003459930419921875|unsuper_loss: 0.0
-average reward score: 5.8984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.78%) |Training time=0.47s (21.60%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3991|ppo_ep: 1|act_loss: -0.013031005859375|cri_loss: -0.00531005859375|unsuper_loss: 0.0
-average reward score: 5.6953125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.02%) |Training time=0.47s (21.40%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3992|ppo_ep: 1|act_loss: 0.0122833251953125|cri_loss: 0.0067596435546875|unsuper_loss: 0.0
-average reward score: 5.18359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.97%) |Training time=0.46s (21.39%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3993|ppo_ep: 1|act_loss: 0.0008268356323242188|cri_loss: 0.0008487701416015625|unsuper_loss: 0.0
-average reward score: 5.4765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.04%) |Training time=0.46s (21.35%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3994|ppo_ep: 1|act_loss: 0.050140380859375|cri_loss: 0.02655029296875|unsuper_loss: 0.0
-average reward score: 5.16796875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.30%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3995|ppo_ep: 1|act_loss: 0.032928466796875|cri_loss: 0.0180206298828125|unsuper_loss: 0.0
-average reward score: 4.8046875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.13%) |Training time=0.46s (21.25%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3996|ppo_ep: 1|act_loss: -0.0299072265625|cri_loss: -0.01325225830078125|unsuper_loss: 0.0
-average reward score: 5.78125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.32%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3997|ppo_ep: 1|act_loss: -0.0191192626953125|cri_loss: -0.00928497314453125|unsuper_loss: 0.0
-average reward score: 5.83203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.02%) |Training time=0.46s (21.36%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 3998|ppo_ep: 1|act_loss: 0.00289154052734375|cri_loss: 0.00174713134765625|unsuper_loss: 0.0
-average reward score: 5.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.30%) |Training time=0.48s (20.45%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.58 |AvgSamplesPerSec=14.46
-[2023-04-14 11:15:01,190] [INFO] [logging.py:96:log_dist] [Rank 0] step=4000, skipped=56, lr=[5.262795523930069e-06, 5.262795523930069e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:15:01,208] [INFO] [timer.py:199:stop] epoch=0/micro_step=4000/global_step=4000, RunningAvgSamplesPerSec=105.31314272642126, CurrSamplesPerSec=108.65087362544847, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:15:01,301] [INFO] [logging.py:96:log_dist] [Rank 0] step=4000, skipped=64, lr=[2.73450419122895e-06, 2.73450419122895e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 3999|ppo_ep: 1|act_loss: 0.01080322265625|cri_loss: 0.00595855712890625|unsuper_loss: 0.0
-average reward score: 4.98046875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.21%) |Training time=0.46s (21.16%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4000|ppo_ep: 1|act_loss: 0.00574493408203125|cri_loss: 0.0033092498779296875|unsuper_loss: 0.0
-average reward score: 6.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.83%) |Training time=0.47s (21.55%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4001|ppo_ep: 1|act_loss: 0.040130615234375|cri_loss: 0.021820068359375|unsuper_loss: 0.0
-average reward score: 4.5703125
--------------------------------------------------------------------------------------
-|E2E latency=2.87s |Gather latency=0.00s (0.00%) |Generate time=1.61s (55.99%) |Training time=0.46s (16.10%) |Others=0.80 (27.91%)|CurSamplesPerSec=11.16 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4002|ppo_ep: 1|act_loss: -0.0129547119140625|cri_loss: -0.005870819091796875|unsuper_loss: 0.0
-average reward score: 5.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.33%) |Training time=0.46s (21.05%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4003|ppo_ep: 1|act_loss: 0.00415802001953125|cri_loss: 0.0026340484619140625|unsuper_loss: 0.0
-average reward score: 4.9140625
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.92%) |Training time=0.47s (21.43%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4004|ppo_ep: 1|act_loss: 0.003192901611328125|cri_loss: 0.0031108856201171875|unsuper_loss: 0.0
-average reward score: 5.65234375
--------------------------------------------------------------------------------------
-|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.17%) |Training time=0.49s (21.44%) |Others=0.10 (4.38%)|CurSamplesPerSec=13.98 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4005|ppo_ep: 1|act_loss: 0.03448486328125|cri_loss: 0.017974853515625|unsuper_loss: 0.0
-average reward score: 5.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.87%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4006|ppo_ep: 1|act_loss: 0.00943756103515625|cri_loss: 0.005268096923828125|unsuper_loss: 0.0
-average reward score: 5.8828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.24%) |Training time=0.46s (21.13%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4007|ppo_ep: 1|act_loss: 0.0208740234375|cri_loss: 0.0106964111328125|unsuper_loss: 0.0
-average reward score: 4.76953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (21.96%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4008|ppo_ep: 1|act_loss: 0.01227569580078125|cri_loss: 0.006473541259765625|unsuper_loss: 0.0
-average reward score: 4.453125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.56%) |Training time=0.48s (21.84%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46
-[2023-04-14 11:15:23,749] [INFO] [logging.py:96:log_dist] [Rank 0] step=4010, skipped=56, lr=[5.244292743686956e-06, 5.244292743686956e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:15:23,767] [INFO] [timer.py:199:stop] epoch=0/micro_step=4010/global_step=4010, RunningAvgSamplesPerSec=105.30904361496287, CurrSamplesPerSec=102.18848468098487, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:15:23,859] [INFO] [logging.py:96:log_dist] [Rank 0] step=4010, skipped=64, lr=[2.7249199369126855e-06, 2.7249199369126855e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4009|ppo_ep: 1|act_loss: -0.009063720703125|cri_loss: -0.0037689208984375|unsuper_loss: 0.0
-average reward score: 5.3984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (22.00%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4010|ppo_ep: 1|act_loss: -0.0294189453125|cri_loss: -0.0133209228515625|unsuper_loss: 0.0
-average reward score: 5.39453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.29%) |Training time=0.48s (22.09%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4011|ppo_ep: 1|act_loss: 0.0192413330078125|cri_loss: 0.010101318359375|unsuper_loss: 0.0
-average reward score: 4.64453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.76%) |Training time=0.47s (21.60%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4012|ppo_ep: 1|act_loss: -0.01611328125|cri_loss: -0.00748443603515625|unsuper_loss: 0.0
-average reward score: 5.3515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.90%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4013|ppo_ep: 1|act_loss: -0.01181793212890625|cri_loss: -0.0032501220703125|unsuper_loss: 0.0
-average reward score: 4.7890625
--------------------------------------------------------------------------------------
-|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.58%) |Training time=0.50s (21.18%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.56 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4014|ppo_ep: 1|act_loss: -0.007022857666015625|cri_loss: -0.002872467041015625|unsuper_loss: 0.0
-average reward score: 6.125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.38%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4015|ppo_ep: 1|act_loss: 0.009246826171875|cri_loss: 0.0051422119140625|unsuper_loss: 0.0
-average reward score: 5.23828125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.24%) |Training time=0.46s (21.13%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4016|ppo_ep: 1|act_loss: 0.0111846923828125|cri_loss: 0.00629425048828125|unsuper_loss: 0.0
-average reward score: 5.8828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.26%) |Training time=0.48s (22.14%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4017|ppo_ep: 1|act_loss: 0.0282135009765625|cri_loss: 0.015533447265625|unsuper_loss: 0.0
-average reward score: 4.8671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.46%) |Training time=0.47s (21.92%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4018|ppo_ep: 1|act_loss: 0.022796630859375|cri_loss: 0.01273345947265625|unsuper_loss: 0.0
-average reward score: 5.46484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.27%) |Training time=0.48s (22.12%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-[2023-04-14 11:15:45,614] [INFO] [logging.py:96:log_dist] [Rank 0] step=4020, skipped=56, lr=[5.2257837485138914e-06, 5.2257837485138914e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:15:45,632] [INFO] [timer.py:199:stop] epoch=0/micro_step=4020/global_step=4020, RunningAvgSamplesPerSec=105.30119557757467, CurrSamplesPerSec=102.19229714995767, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:15:45,725] [INFO] [logging.py:96:log_dist] [Rank 0] step=4020, skipped=64, lr=[2.7153323487403653e-06, 2.7153323487403653e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4019|ppo_ep: 1|act_loss: 0.0105133056640625|cri_loss: 0.006256103515625|unsuper_loss: 0.0
-average reward score: 6.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.46%) |Training time=0.48s (21.95%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4020|ppo_ep: 1|act_loss: -0.015472412109375|cri_loss: -0.007476806640625|unsuper_loss: 0.0
-average reward score: 5.1796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.54%) |Training time=0.47s (21.84%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4021|ppo_ep: 1|act_loss: -0.0288848876953125|cri_loss: -0.0139923095703125|unsuper_loss: 0.0
-average reward score: 5.26171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.69%) |Training time=0.47s (21.69%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4022|ppo_ep: 1|act_loss: -0.01552581787109375|cri_loss: -0.0073699951171875|unsuper_loss: 0.0
-average reward score: 5.39453125
--------------------------------------------------------------------------------------
-|E2E latency=2.78s |Gather latency=0.00s (0.00%) |Generate time=1.59s (57.30%) |Training time=0.47s (16.97%) |Others=0.71 (25.73%)|CurSamplesPerSec=11.52 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4023|ppo_ep: 1|act_loss: -0.01531982421875|cri_loss: -0.00725555419921875|unsuper_loss: 0.0
-average reward score: 5.74609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.46%) |Training time=0.48s (21.92%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4024|ppo_ep: 1|act_loss: 0.029022216796875|cri_loss: 0.015380859375|unsuper_loss: 0.0
-average reward score: 5.20703125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.38%) |Training time=0.45s (20.97%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4025|ppo_ep: 1|act_loss: -0.012115478515625|cri_loss: -0.0057373046875|unsuper_loss: 0.0
-average reward score: 5.8125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.74%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4026|ppo_ep: 1|act_loss: -0.0033702850341796875|cri_loss: -0.0012416839599609375|unsuper_loss: 0.0
-average reward score: 4.88671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.48s (21.95%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4027|ppo_ep: 1|act_loss: 0.007076263427734375|cri_loss: 0.00379180908203125|unsuper_loss: 0.0
-average reward score: 4.6015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.58%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4028|ppo_ep: 1|act_loss: -0.0110015869140625|cri_loss: -0.0049896240234375|unsuper_loss: 0.0
-average reward score: 5.2734375
--------------------------------------------------------------------------------------
-|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.54%) |Training time=0.50s (21.21%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.55 |AvgSamplesPerSec=14.46
-[2023-04-14 11:16:08,106] [INFO] [logging.py:96:log_dist] [Rank 0] step=4030, skipped=56, lr=[5.207268812758837e-06, 5.207268812758837e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:16:08,125] [INFO] [timer.py:199:stop] epoch=0/micro_step=4030/global_step=4030, RunningAvgSamplesPerSec=105.29472441602836, CurrSamplesPerSec=101.67324172839862, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:16:08,218] [INFO] [logging.py:96:log_dist] [Rank 0] step=4030, skipped=64, lr=[2.7057415688231765e-06, 2.7057415688231765e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4029|ppo_ep: 1|act_loss: 0.039093017578125|cri_loss: 0.020843505859375|unsuper_loss: 0.0
-average reward score: 4.5703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (21.98%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4030|ppo_ep: 1|act_loss: -0.017974853515625|cri_loss: -0.00848388671875|unsuper_loss: 0.0
-average reward score: 5.48828125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.23%) |Training time=0.48s (22.17%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4031|ppo_ep: 1|act_loss: -0.046630859375|cri_loss: -0.021697998046875|unsuper_loss: 0.0
-average reward score: 5.17578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.95%) |Training time=0.47s (21.43%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4032|ppo_ep: 1|act_loss: -0.0113372802734375|cri_loss: -0.004917144775390625|unsuper_loss: 0.0
-average reward score: 4.6640625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.03%) |Training time=0.47s (21.40%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4033|ppo_ep: 1|act_loss: 0.027557373046875|cri_loss: 0.0144195556640625|unsuper_loss: 0.0
-average reward score: 5.30859375
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.60%) |Training time=0.47s (20.97%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.13 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4034|ppo_ep: 1|act_loss: -0.0010023117065429688|cri_loss: -0.00019073486328125|unsuper_loss: 0.0
-average reward score: 5.515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.51%) |Training time=0.47s (21.86%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4035|ppo_ep: 1|act_loss: -0.023345947265625|cri_loss: -0.0111083984375|unsuper_loss: 0.0
-average reward score: 5.25390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.81%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4036|ppo_ep: 1|act_loss: 0.03778076171875|cri_loss: 0.0196075439453125|unsuper_loss: 0.0
-average reward score: 4.6953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.35%) |Training time=0.48s (22.04%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4037|ppo_ep: 1|act_loss: 0.00446319580078125|cri_loss: 0.0024566650390625|unsuper_loss: 0.0
-average reward score: 5.546875
--------------------------------------------------------------------------------------
-|E2E latency=2.63s |Gather latency=0.00s (0.00%) |Generate time=1.59s (60.51%) |Training time=0.47s (17.72%) |Others=0.57 (21.77%)|CurSamplesPerSec=12.18 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4038|ppo_ep: 1|act_loss: -0.009307861328125|cri_loss: -0.0042724609375|unsuper_loss: 0.0
-average reward score: 4.84765625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.91%) |Training time=0.46s (21.45%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
-[2023-04-14 11:16:30,361] [INFO] [logging.py:96:log_dist] [Rank 0] step=4040, skipped=56, lr=[5.188748210857804e-06, 5.188748210857804e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:16:30,379] [INFO] [timer.py:199:stop] epoch=0/micro_step=4040/global_step=4040, RunningAvgSamplesPerSec=105.28961572115509, CurrSamplesPerSec=100.88236204918982, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:16:30,472] [INFO] [logging.py:96:log_dist] [Rank 0] step=4040, skipped=64, lr=[2.696147739319613e-06, 2.696147739319613e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4039|ppo_ep: 1|act_loss: 0.0245361328125|cri_loss: 0.01309967041015625|unsuper_loss: 0.0
-average reward score: 5.0078125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.21%) |Training time=0.48s (22.19%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4040|ppo_ep: 1|act_loss: -0.0211181640625|cri_loss: -0.01006317138671875|unsuper_loss: 0.0
-average reward score: 5.4453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.73%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4041|ppo_ep: 1|act_loss: -0.00572967529296875|cri_loss: -0.0024394989013671875|unsuper_loss: 0.0
-average reward score: 6.11328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.48s (22.00%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4042|ppo_ep: 1|act_loss: 6.723403930664062e-05|cri_loss: 0.0003829002380371094|unsuper_loss: 0.0
-average reward score: 4.8515625
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.70%) |Training time=0.51s (23.09%) |Others=0.12 (5.22%)|CurSamplesPerSec=14.45 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4043|ppo_ep: 1|act_loss: 0.01611328125|cri_loss: 0.0083465576171875|unsuper_loss: 0.0
-average reward score: 5.78125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.65%) |Training time=0.47s (21.72%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4044|ppo_ep: 1|act_loss: 0.017364501953125|cri_loss: 0.009002685546875|unsuper_loss: 0.0
-average reward score: 5.78515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.86%) |Training time=0.47s (21.47%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4045|ppo_ep: 1|act_loss: 0.04150390625|cri_loss: 0.021636962890625|unsuper_loss: 0.0
-average reward score: 6.04296875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.49%) |Training time=0.44s (20.25%) |Others=0.11 (5.25%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4046|ppo_ep: 1|act_loss: 0.03741455078125|cri_loss: 0.0192413330078125|unsuper_loss: 0.0
-average reward score: 5.203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.79%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4047|ppo_ep: 1|act_loss: 0.017913818359375|cri_loss: 0.009490966796875|unsuper_loss: 0.0
-average reward score: 5.4765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.71%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4048|ppo_ep: 1|act_loss: 0.023406982421875|cri_loss: 0.0120697021484375|unsuper_loss: 0.0
-average reward score: 7.00390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.51%) |Training time=0.47s (21.87%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-[2023-04-14 11:16:52,062] [INFO] [logging.py:96:log_dist] [Rank 0] step=4050, skipped=56, lr=[5.170222217330791e-06, 5.170222217330791e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:16:52,081] [INFO] [timer.py:199:stop] epoch=0/micro_step=4050/global_step=4050, RunningAvgSamplesPerSec=105.28449685090004, CurrSamplesPerSec=100.30200816658396, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:16:52,173] [INFO] [logging.py:96:log_dist] [Rank 0] step=4050, skipped=64, lr=[2.686551002433372e-06, 2.686551002433372e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4049|ppo_ep: 1|act_loss: 0.0283660888671875|cri_loss: 0.01503753662109375|unsuper_loss: 0.0
-average reward score: 4.09375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.19%) |Training time=0.48s (22.21%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4050|ppo_ep: 1|act_loss: -0.0017862319946289062|cri_loss: -0.00014972686767578125|unsuper_loss: 0.0
-average reward score: 4.4296875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.76%) |Training time=0.47s (21.60%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4051|ppo_ep: 1|act_loss: -0.0023956298828125|cri_loss: -0.0001583099365234375|unsuper_loss: 0.0
-average reward score: 4.73828125
--------------------------------------------------------------------------------------
-|E2E latency=2.80s |Gather latency=0.00s (0.00%) |Generate time=1.59s (56.88%) |Training time=0.47s (16.86%) |Others=0.73 (26.26%)|CurSamplesPerSec=11.44 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4052|ppo_ep: 1|act_loss: -0.0013427734375|cri_loss: 0.0010967254638671875|unsuper_loss: 0.0
-average reward score: 4.5390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.39%) |Training time=0.47s (21.94%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4053|ppo_ep: 1|act_loss: -0.03289794921875|cri_loss: -0.015869140625|unsuper_loss: 0.0
-average reward score: 5.0703125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.40%) |Training time=0.47s (21.95%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4054|ppo_ep: 1|act_loss: -0.0076904296875|cri_loss: -0.0036258697509765625|unsuper_loss: 0.0
-average reward score: 4.6484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (22.05%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4055|ppo_ep: 1|act_loss: 0.014862060546875|cri_loss: 0.007785797119140625|unsuper_loss: 0.0
-average reward score: 5.0703125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.48s (22.29%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4056|ppo_ep: 1|act_loss: 0.003627777099609375|cri_loss: 0.00479888916015625|unsuper_loss: 0.0
-average reward score: 5.81640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.47s (21.91%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4057|ppo_ep: 1|act_loss: -0.003406524658203125|cri_loss: -0.0008668899536132812|unsuper_loss: 0.0
-average reward score: 5.44921875
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.02%) |Training time=0.48s (20.69%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4058|ppo_ep: 1|act_loss: 0.0282440185546875|cri_loss: 0.0143585205078125|unsuper_loss: 0.0
-average reward score: 5.64453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.25%) |Training time=0.48s (22.11%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-[2023-04-14 11:17:14,465] [INFO] [logging.py:96:log_dist] [Rank 0] step=4060, skipped=56, lr=[5.151691106777714e-06, 5.151691106777714e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:17:14,483] [INFO] [timer.py:199:stop] epoch=0/micro_step=4060/global_step=4060, RunningAvgSamplesPerSec=105.27758348616538, CurrSamplesPerSec=104.85874689254413, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:17:14,576] [INFO] [logging.py:96:log_dist] [Rank 0] step=4060, skipped=64, lr=[2.6769515004112453e-06, 2.6769515004112453e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4059|ppo_ep: 1|act_loss: 0.018707275390625|cri_loss: 0.00998687744140625|unsuper_loss: 0.0
-average reward score: 5.734375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.47s (21.67%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4060|ppo_ep: 1|act_loss: 0.01556396484375|cri_loss: 0.0082855224609375|unsuper_loss: 0.0
-average reward score: 5.63671875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.35%) |Training time=0.48s (22.00%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4061|ppo_ep: 1|act_loss: -0.0537109375|cri_loss: -0.0259552001953125|unsuper_loss: 0.0
-average reward score: 5.68359375
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.27%) |Training time=0.49s (22.15%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.41 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4062|ppo_ep: 1|act_loss: -0.0125885009765625|cri_loss: -0.005950927734375|unsuper_loss: 0.0
-average reward score: 5.3359375
--------------------------------------------------------------------------------------
-|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.74s (75.58%) |Training time=0.46s (19.99%) |Others=0.10 (4.43%)|CurSamplesPerSec=13.93 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4063|ppo_ep: 1|act_loss: -0.029144287109375|cri_loss: -0.01389312744140625|unsuper_loss: 0.0
-average reward score: 4.890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.42%) |Training time=0.43s (19.88%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4064|ppo_ep: 1|act_loss: -0.018829345703125|cri_loss: -0.0090179443359375|unsuper_loss: 0.0
-average reward score: 4.671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.28%) |Training time=0.44s (20.06%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4065|ppo_ep: 1|act_loss: -0.03253173828125|cri_loss: -0.0152435302734375|unsuper_loss: 0.0
-average reward score: 5.1640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.20%) |Training time=0.46s (21.06%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4066|ppo_ep: 1|act_loss: -0.001811981201171875|cri_loss: -0.0007963180541992188|unsuper_loss: 0.0
-average reward score: 4.7578125
--------------------------------------------------------------------------------------
-|E2E latency=2.40s |Gather latency=0.00s (0.00%) |Generate time=1.61s (67.20%) |Training time=0.46s (19.05%) |Others=0.33 (13.75%)|CurSamplesPerSec=13.34 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4067|ppo_ep: 1|act_loss: -0.01019287109375|cri_loss: -0.004711151123046875|unsuper_loss: 0.0
-average reward score: 4.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.31%) |Training time=0.46s (21.04%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4068|ppo_ep: 1|act_loss: -0.00948333740234375|cri_loss: -0.004062652587890625|unsuper_loss: 0.0
-average reward score: 5.5625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.09%) |Training time=0.44s (20.23%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-[2023-04-14 11:17:36,585] [INFO] [logging.py:96:log_dist] [Rank 0] step=4070, skipped=56, lr=[5.133155153874335e-06, 5.133155153874335e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:17:36,603] [INFO] [timer.py:199:stop] epoch=0/micro_step=4070/global_step=4070, RunningAvgSamplesPerSec=105.28821586996683, CurrSamplesPerSec=112.36743147915281, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:17:36,695] [INFO] [logging.py:96:log_dist] [Rank 0] step=4070, skipped=64, lr=[2.6673493755410096e-06, 2.6673493755410096e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4069|ppo_ep: 1|act_loss: -0.03155517578125|cri_loss: -0.015045166015625|unsuper_loss: 0.0
-average reward score: 4.50390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.79%) |Training time=0.45s (20.58%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4070|ppo_ep: 1|act_loss: 0.026214599609375|cri_loss: 0.01456451416015625|unsuper_loss: 0.0
-average reward score: 4.82421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.29%) |Training time=0.46s (21.07%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4071|ppo_ep: 1|act_loss: 0.01337432861328125|cri_loss: 0.00728607177734375|unsuper_loss: 0.0
-average reward score: 5.1171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.57%) |Training time=0.45s (20.78%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4072|ppo_ep: 1|act_loss: -0.022918701171875|cri_loss: -0.0107879638671875|unsuper_loss: 0.0
-average reward score: 4.2734375
--------------------------------------------------------------------------------------
-|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.78s (74.87%) |Training time=0.49s (20.81%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.49 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4073|ppo_ep: 1|act_loss: -0.0123291015625|cri_loss: -0.00556182861328125|unsuper_loss: 0.0
-average reward score: 4.3125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.18%) |Training time=0.46s (21.21%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4074|ppo_ep: 1|act_loss: 0.0379638671875|cri_loss: 0.021881103515625|unsuper_loss: 0.0
-average reward score: 4.9921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.55%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4075|ppo_ep: 1|act_loss: -0.0139617919921875|cri_loss: -0.006725311279296875|unsuper_loss: 0.0
-average reward score: 5.1953125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.31%) |Training time=0.48s (22.07%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4076|ppo_ep: 1|act_loss: -0.0021991729736328125|cri_loss: -0.0007162094116210938|unsuper_loss: 0.0
-average reward score: 5.4921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.03%) |Training time=0.46s (21.33%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4077|ppo_ep: 1|act_loss: 0.04083251953125|cri_loss: 0.0215606689453125|unsuper_loss: 0.0
-average reward score: 4.984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.46%) |Training time=0.45s (20.91%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4078|ppo_ep: 1|act_loss: -0.03167724609375|cri_loss: -0.014312744140625|unsuper_loss: 0.0
-average reward score: 5.1171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.30%) |Training time=0.46s (21.06%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-[2023-04-14 11:17:58,512] [INFO] [logging.py:96:log_dist] [Rank 0] step=4080, skipped=56, lr=[5.114614633368193e-06, 5.114614633368193e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:17:58,530] [INFO] [timer.py:199:stop] epoch=0/micro_step=4080/global_step=4080, RunningAvgSamplesPerSec=105.29089003351561, CurrSamplesPerSec=111.21455442164469, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:17:58,622] [INFO] [logging.py:96:log_dist] [Rank 0] step=4080, skipped=64, lr=[2.657744770149321e-06, 2.657744770149321e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4079|ppo_ep: 1|act_loss: -0.0562744140625|cri_loss: -0.0266876220703125|unsuper_loss: 0.0
-average reward score: 5.36328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.57%) |Training time=0.45s (20.80%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4080|ppo_ep: 1|act_loss: -0.01227569580078125|cri_loss: -0.00440216064453125|unsuper_loss: 0.0
-average reward score: 5.19921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.23%) |Training time=0.46s (21.12%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4081|ppo_ep: 1|act_loss: 0.013671875|cri_loss: 0.00727081298828125|unsuper_loss: 0.0
-average reward score: 5.0625
--------------------------------------------------------------------------------------
-|E2E latency=3.27s |Gather latency=0.00s (0.00%) |Generate time=1.60s (48.92%) |Training time=0.47s (14.48%) |Others=1.20 (36.60%)|CurSamplesPerSec=9.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4082|ppo_ep: 1|act_loss: -0.0014829635620117188|cri_loss: 5.340576171875e-05|unsuper_loss: 0.0
-average reward score: 5.01171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.77%) |Training time=0.47s (21.59%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4083|ppo_ep: 1|act_loss: -0.007236480712890625|cri_loss: -0.002948760986328125|unsuper_loss: 0.0
-average reward score: 5.0703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.58%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4084|ppo_ep: 1|act_loss: 0.002590179443359375|cri_loss: 0.001953125|unsuper_loss: 0.0
-average reward score: 5.2578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.64%) |Training time=0.47s (21.73%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4085|ppo_ep: 1|act_loss: 0.0026874542236328125|cri_loss: 0.0015869140625|unsuper_loss: 0.0
-average reward score: 4.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.73%) |Training time=0.47s (21.65%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4086|ppo_ep: 1|act_loss: 0.030548095703125|cri_loss: 0.018157958984375|unsuper_loss: 0.0
-average reward score: 4.9609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.55%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4087|ppo_ep: 1|act_loss: -0.02142333984375|cri_loss: -0.01007080078125|unsuper_loss: 0.0
-average reward score: 5.02734375
--------------------------------------------------------------------------------------
-|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.77s (74.70%) |Training time=0.50s (21.09%) |Others=0.10 (4.21%)|CurSamplesPerSec=13.49 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4088|ppo_ep: 1|act_loss: 0.0472412109375|cri_loss: 0.025299072265625|unsuper_loss: 0.0
-average reward score: 4.94921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.56%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-[2023-04-14 11:18:21,553] [INFO] [logging.py:96:log_dist] [Rank 0] step=4090, skipped=56, lr=[5.096069820074525e-06, 5.096069820074525e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:18:21,571] [INFO] [timer.py:199:stop] epoch=0/micro_step=4090/global_step=4090, RunningAvgSamplesPerSec=105.28498538237922, CurrSamplesPerSec=98.45610699342147, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:18:21,664] [INFO] [logging.py:96:log_dist] [Rank 0] step=4090, skipped=64, lr=[2.6481378265995993e-06, 2.6481378265995993e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4089|ppo_ep: 1|act_loss: -0.0207672119140625|cri_loss: -0.0098876953125|unsuper_loss: 0.0
-average reward score: 5.0625
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.24%) |Training time=0.49s (22.18%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4090|ppo_ep: 1|act_loss: -0.00774383544921875|cri_loss: -0.0029888153076171875|unsuper_loss: 0.0
-average reward score: 5.19921875
--------------------------------------------------------------------------------------
-|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.26%) |Training time=0.50s (22.23%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.34 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4091|ppo_ep: 1|act_loss: -0.01397705078125|cri_loss: -0.006793975830078125|unsuper_loss: 0.0
-average reward score: 5.12890625
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.63%) |Training time=0.48s (20.95%) |Others=0.10 (4.41%)|CurSamplesPerSec=14.10 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4092|ppo_ep: 1|act_loss: 0.02203369140625|cri_loss: 0.0111541748046875|unsuper_loss: 0.0
-average reward score: 4.75390625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.15%) |Training time=0.45s (21.18%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4093|ppo_ep: 1|act_loss: 0.019866943359375|cri_loss: 0.01026153564453125|unsuper_loss: 0.0
-average reward score: 5.515625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.86%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4094|ppo_ep: 1|act_loss: 0.034454345703125|cri_loss: 0.018218994140625|unsuper_loss: 0.0
-average reward score: 5.3828125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.39%) |Training time=0.45s (20.93%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4095|ppo_ep: 1|act_loss: 0.031097412109375|cri_loss: 0.0160675048828125|unsuper_loss: 0.0
-average reward score: 4.54296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.29%) |Training time=0.45s (21.02%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4096|ppo_ep: 1|act_loss: 0.00621795654296875|cri_loss: 0.0036296844482421875|unsuper_loss: 0.0
-average reward score: 5.5
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.18%) |Training time=0.46s (21.17%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4097|ppo_ep: 1|act_loss: -0.00027561187744140625|cri_loss: 0.0006251335144042969|unsuper_loss: 0.0
-average reward score: 5.3828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.46s (21.13%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4098|ppo_ep: 1|act_loss: 0.01508331298828125|cri_loss: 0.00829315185546875|unsuper_loss: 0.0
-average reward score: 5.44921875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.48%) |Training time=0.45s (20.86%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
-[2023-04-14 11:18:43,269] [INFO] [logging.py:96:log_dist] [Rank 0] step=4100, skipped=56, lr=[5.077520988872205e-06, 5.077520988872205e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:18:43,287] [INFO] [timer.py:199:stop] epoch=0/micro_step=4100/global_step=4100, RunningAvgSamplesPerSec=105.28956076846497, CurrSamplesPerSec=102.20793903058447, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:18:43,379] [INFO] [logging.py:96:log_dist] [Rank 0] step=4100, skipped=64, lr=[2.638528687289925e-06, 2.638528687289925e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4099|ppo_ep: 1|act_loss: -0.001491546630859375|cri_loss: -0.0004725456237792969|unsuper_loss: 0.0
-average reward score: 5.9921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.50%) |Training time=0.48s (21.91%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4100|ppo_ep: 1|act_loss: 0.002899169921875|cri_loss: 0.002117156982421875|unsuper_loss: 0.0
-average reward score: 5.15625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.23%) |Training time=0.46s (21.15%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4101|ppo_ep: 1|act_loss: 0.00829315185546875|cri_loss: 0.004711151123046875|unsuper_loss: 0.0
-average reward score: 5.30859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.13%) |Training time=0.44s (20.12%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4102|ppo_ep: 1|act_loss: -0.0052642822265625|cri_loss: -0.00102996826171875|unsuper_loss: 0.0
-average reward score: 4.90234375
--------------------------------------------------------------------------------------
-|E2E latency=2.61s |Gather latency=0.00s (0.00%) |Generate time=1.78s (68.45%) |Training time=0.46s (17.74%) |Others=0.36 (13.81%)|CurSamplesPerSec=12.27 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4103|ppo_ep: 1|act_loss: 0.014617919921875|cri_loss: 0.00801849365234375|unsuper_loss: 0.0
-average reward score: 5.3046875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.52%) |Training time=0.43s (19.75%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4104|ppo_ep: 1|act_loss: 0.0107421875|cri_loss: 0.005863189697265625|unsuper_loss: 0.0
-average reward score: 4.99609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.66%) |Training time=0.42s (19.66%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4105|ppo_ep: 1|act_loss: 0.0217437744140625|cri_loss: 0.01155853271484375|unsuper_loss: 0.0
-average reward score: 5.71875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.80%) |Training time=0.44s (20.54%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4106|ppo_ep: 1|act_loss: 0.020599365234375|cri_loss: 0.01052093505859375|unsuper_loss: 0.0
-average reward score: 5.765625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.88%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4107|ppo_ep: 1|act_loss: 0.05413818359375|cri_loss: 0.0306396484375|unsuper_loss: 0.0
-average reward score: 4.96484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.07%) |Training time=0.46s (21.29%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4108|ppo_ep: 1|act_loss: 0.0389404296875|cri_loss: 0.0228271484375|unsuper_loss: 0.0
-average reward score: 4.6796875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.82%) |Training time=0.44s (20.49%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-[2023-04-14 11:19:05,339] [INFO] [logging.py:96:log_dist] [Rank 0] step=4110, skipped=56, lr=[5.058968414699655e-06, 5.058968414699655e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:19:05,358] [INFO] [timer.py:199:stop] epoch=0/micro_step=4110/global_step=4110, RunningAvgSamplesPerSec=105.3072645068091, CurrSamplesPerSec=110.64967844108436, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:19:05,450] [INFO] [logging.py:96:log_dist] [Rank 0] step=4110, skipped=64, lr=[2.628917494650923e-06, 2.628917494650923e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4109|ppo_ep: 1|act_loss: -0.0223541259765625|cri_loss: -0.01024627685546875|unsuper_loss: 0.0
-average reward score: 5.34765625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.41%) |Training time=0.45s (20.94%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4110|ppo_ep: 1|act_loss: -0.004241943359375|cri_loss: -0.0017843246459960938|unsuper_loss: 0.0
-average reward score: 5.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.65%) |Training time=0.47s (21.60%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4111|ppo_ep: 1|act_loss: -0.0271453857421875|cri_loss: -0.0132598876953125|unsuper_loss: 0.0
-average reward score: 6.109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.46s (21.46%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4112|ppo_ep: 1|act_loss: -0.0033969879150390625|cri_loss: -0.001392364501953125|unsuper_loss: 0.0
-average reward score: 4.546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.47s (21.89%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4113|ppo_ep: 1|act_loss: -0.0269622802734375|cri_loss: -0.01311492919921875|unsuper_loss: 0.0
-average reward score: 5.859375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.65%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4114|ppo_ep: 1|act_loss: 0.013763427734375|cri_loss: 0.007221221923828125|unsuper_loss: 0.0
-average reward score: 4.4296875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.76%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4115|ppo_ep: 1|act_loss: 0.00577545166015625|cri_loss: 0.0037689208984375|unsuper_loss: 0.0
-average reward score: 4.8515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.72%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4116|ppo_ep: 1|act_loss: 0.0099029541015625|cri_loss: 0.0052490234375|unsuper_loss: 0.0
-average reward score: 4.92578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.77%) |Training time=0.46s (21.30%) |Others=0.11 (4.92%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4117|ppo_ep: 1|act_loss: 0.0099945068359375|cri_loss: 0.005489349365234375|unsuper_loss: 0.0
-average reward score: 5.71484375
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.74s (74.40%) |Training time=0.48s (20.70%) |Others=0.11 (4.90%)|CurSamplesPerSec=13.66 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4118|ppo_ep: 1|act_loss: 0.04541015625|cri_loss: 0.023223876953125|unsuper_loss: 0.0
-average reward score: 5.296875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.95%) |Training time=0.46s (21.08%) |Others=0.11 (4.98%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-[2023-04-14 11:19:27,136] [INFO] [logging.py:96:log_dist] [Rank 0] step=4120, skipped=56, lr=[5.040412372550783e-06, 5.040412372550783e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:19:27,154] [INFO] [timer.py:199:stop] epoch=0/micro_step=4120/global_step=4120, RunningAvgSamplesPerSec=105.30798613236172, CurrSamplesPerSec=111.58476661744949, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:19:27,247] [INFO] [logging.py:96:log_dist] [Rank 0] step=4120, skipped=64, lr=[2.6193043911436534e-06, 2.6193043911436534e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4119|ppo_ep: 1|act_loss: -0.01499176025390625|cri_loss: -0.007236480712890625|unsuper_loss: 0.0
-average reward score: 4.42578125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.81%) |Training time=0.45s (20.59%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4120|ppo_ep: 1|act_loss: -0.0572509765625|cri_loss: -0.027008056640625|unsuper_loss: 0.0
-average reward score: 4.7578125
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.84%) |Training time=0.47s (20.74%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.06 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4121|ppo_ep: 1|act_loss: -0.0028820037841796875|cri_loss: -0.0010728836059570312|unsuper_loss: 0.0
-average reward score: 5.9375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.32%) |Training time=0.45s (20.98%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4122|ppo_ep: 1|act_loss: 0.009918212890625|cri_loss: 0.005207061767578125|unsuper_loss: 0.0
-average reward score: 5.60546875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.30%) |Training time=0.45s (21.02%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4123|ppo_ep: 1|act_loss: 0.0007719993591308594|cri_loss: 0.0010890960693359375|unsuper_loss: 0.0
-average reward score: 5.359375
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.59s (67.95%) |Training time=0.45s (19.46%) |Others=0.29 (12.59%)|CurSamplesPerSec=13.70 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4124|ppo_ep: 1|act_loss: -0.02691650390625|cri_loss: -0.013092041015625|unsuper_loss: 0.0
-average reward score: 5.3984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.29%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4125|ppo_ep: 1|act_loss: -0.0169830322265625|cri_loss: -0.00821685791015625|unsuper_loss: 0.0
-average reward score: 4.98828125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.77%) |Training time=0.46s (21.57%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4126|ppo_ep: 1|act_loss: 0.0174407958984375|cri_loss: 0.0089874267578125|unsuper_loss: 0.0
-average reward score: 6.0703125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.02%) |Training time=0.46s (21.29%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4127|ppo_ep: 1|act_loss: 0.0179443359375|cri_loss: 0.00942230224609375|unsuper_loss: 0.0
-average reward score: 5.984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.67%) |Training time=0.47s (21.70%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4128|ppo_ep: 1|act_loss: 0.0059051513671875|cri_loss: 0.0035343170166015625|unsuper_loss: 0.0
-average reward score: 4.87109375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.78%) |Training time=0.46s (21.55%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.46
-[2023-04-14 11:19:48,896] [INFO] [logging.py:96:log_dist] [Rank 0] step=4130, skipped=56, lr=[5.021853137470893e-06, 5.021853137470893e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:19:48,914] [INFO] [timer.py:199:stop] epoch=0/micro_step=4130/global_step=4130, RunningAvgSamplesPerSec=105.31352380297785, CurrSamplesPerSec=104.6573610334602, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:19:49,007] [INFO] [logging.py:96:log_dist] [Rank 0] step=4130, skipped=64, lr=[2.6096895192575005e-06, 2.6096895192575005e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4129|ppo_ep: 1|act_loss: 0.0205078125|cri_loss: 0.01119232177734375|unsuper_loss: 0.0
-average reward score: 5.18359375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.53%) |Training time=0.47s (21.82%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
-[2023-04-14 11:19:51,138] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 4130|ppo_ep: 1|act_loss: -0.0105438232421875|cri_loss: -0.00447845458984375|unsuper_loss: 0.0
-average reward score: 5.0234375
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.18%) |Training time=0.46s (21.63%) |Others=0.09 (4.19%)|CurSamplesPerSec=15.03 |AvgSamplesPerSec=14.46
-[2023-04-14 11:19:53,458] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 4131|ppo_ep: 1|act_loss: 0.0074615478515625|cri_loss: 0.0038928985595703125|unsuper_loss: 0.0
-average reward score: 5.30859375
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.67s (71.92%) |Training time=0.56s (24.20%) |Others=0.09 (3.87%)|CurSamplesPerSec=13.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4132|ppo_ep: 1|act_loss: -0.0083770751953125|cri_loss: -0.0036468505859375|unsuper_loss: 0.0
-average reward score: 4.953125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.01%) |Training time=0.46s (21.31%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4133|ppo_ep: 1|act_loss: -0.0003032684326171875|cri_loss: 0.001373291015625|unsuper_loss: 0.0
-average reward score: 4.80859375
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.54%) |Training time=0.44s (20.76%) |Others=0.10 (4.70%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4134|ppo_ep: 1|act_loss: -0.030120849609375|cri_loss: -0.013824462890625|unsuper_loss: 0.0
-average reward score: 5.36328125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.81%) |Training time=0.46s (21.50%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4135|ppo_ep: 1|act_loss: 0.003726959228515625|cri_loss: 0.002178192138671875|unsuper_loss: 0.0
-average reward score: 5.06640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.17%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4136|ppo_ep: 1|act_loss: -0.03802490234375|cri_loss: -0.0180511474609375|unsuper_loss: 0.0
-average reward score: 4.85546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.01%) |Training time=0.46s (21.32%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4137|ppo_ep: 1|act_loss: 0.008544921875|cri_loss: 0.00445556640625|unsuper_loss: 0.0
-average reward score: 5.66796875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.35%) |Training time=0.45s (20.98%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4138|ppo_ep: 1|act_loss: 0.0252685546875|cri_loss: 0.01383209228515625|unsuper_loss: 0.0
-average reward score: 5.7421875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.82%) |Training time=0.46s (21.51%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
-[2023-04-14 11:20:10,518] [INFO] [logging.py:96:log_dist] [Rank 0] step=4140, skipped=56, lr=[5.003290984552626e-06, 5.003290984552626e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:20:10,536] [INFO] [timer.py:199:stop] epoch=0/micro_step=4140/global_step=4140, RunningAvgSamplesPerSec=105.31356977645355, CurrSamplesPerSec=108.1466958779957, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:20:10,629] [INFO] [logging.py:96:log_dist] [Rank 0] step=4140, skipped=66, lr=[2.6019964442854366e-06, 2.6019964442854366e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4139|ppo_ep: 1|act_loss: 0.03631591796875|cri_loss: 0.0190582275390625|unsuper_loss: 0.0
-average reward score: 6.0703125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.93%) |Training time=0.46s (21.40%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4140|ppo_ep: 1|act_loss: 0.0304107666015625|cri_loss: 0.0156097412109375|unsuper_loss: 0.0
-average reward score: 5.5703125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.68%) |Training time=0.47s (21.65%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4141|ppo_ep: 1|act_loss: 0.02288818359375|cri_loss: 0.01279449462890625|unsuper_loss: 0.0
-average reward score: 4.734375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.59%) |Training time=0.47s (21.76%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4142|ppo_ep: 1|act_loss: -0.0200042724609375|cri_loss: -0.00955963134765625|unsuper_loss: 0.0
-average reward score: 5.38671875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.58%) |Training time=0.47s (21.75%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4143|ppo_ep: 1|act_loss: -0.025634765625|cri_loss: -0.0119476318359375|unsuper_loss: 0.0
-average reward score: 5.26171875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.52%) |Training time=0.47s (21.82%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4144|ppo_ep: 1|act_loss: -0.01433563232421875|cri_loss: -0.006839752197265625|unsuper_loss: 0.0
-average reward score: 5.3203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.59%) |Training time=0.47s (21.81%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4145|ppo_ep: 1|act_loss: -0.00942230224609375|cri_loss: -0.0035762786865234375|unsuper_loss: 0.0
-average reward score: 5.3984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.87%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4146|ppo_ep: 1|act_loss: 0.019073486328125|cri_loss: 0.01001739501953125|unsuper_loss: 0.0
-average reward score: 4.6171875
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.61s (69.23%) |Training time=0.61s (26.14%) |Others=0.11 (4.63%)|CurSamplesPerSec=13.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4147|ppo_ep: 1|act_loss: 0.0037994384765625|cri_loss: 0.002201080322265625|unsuper_loss: 0.0
-average reward score: 5.17578125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.52%) |Training time=0.47s (21.82%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4148|ppo_ep: 1|act_loss: -0.037200927734375|cri_loss: -0.0171966552734375|unsuper_loss: 0.0
-average reward score: 5.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.48s (21.98%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-[2023-04-14 11:20:32,261] [INFO] [logging.py:96:log_dist] [Rank 0] step=4150, skipped=56, lr=[4.984726188931862e-06, 4.984726188931862e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:20:32,296] [INFO] [timer.py:199:stop] epoch=0/micro_step=4150/global_step=4150, RunningAvgSamplesPerSec=105.30342576879355, CurrSamplesPerSec=100.15022639020299, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:20:32,388] [INFO] [logging.py:96:log_dist] [Rank 0] step=4150, skipped=66, lr=[2.592378748472863e-06, 2.592378748472863e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4149|ppo_ep: 1|act_loss: -0.03155517578125|cri_loss: -0.0149993896484375|unsuper_loss: 0.0
-average reward score: 5.51171875
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.65%) |Training time=0.48s (21.83%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4150|ppo_ep: 1|act_loss: 0.03875732421875|cri_loss: 0.0203857421875|unsuper_loss: 0.0
-average reward score: 5.96484375
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.10%) |Training time=0.46s (20.47%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.09 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4151|ppo_ep: 1|act_loss: 0.056396484375|cri_loss: 0.029937744140625|unsuper_loss: 0.0
-average reward score: 6.0234375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.16%) |Training time=0.45s (21.19%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4152|ppo_ep: 1|act_loss: 0.032470703125|cri_loss: 0.0168304443359375|unsuper_loss: 0.0
-average reward score: 6.14453125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.99%) |Training time=0.46s (21.34%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4153|ppo_ep: 1|act_loss: 0.056732177734375|cri_loss: 0.0311279296875|unsuper_loss: 0.0
-average reward score: 4.90234375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.96%) |Training time=0.46s (21.40%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4154|ppo_ep: 1|act_loss: -0.00653839111328125|cri_loss: -0.001399993896484375|unsuper_loss: 0.0
-average reward score: 5.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.98%) |Training time=0.46s (21.39%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4155|ppo_ep: 1|act_loss: 0.0716552734375|cri_loss: 0.03912353515625|unsuper_loss: 0.0
-average reward score: 5.58984375
--------------------------------------------------------------------------------------
-|E2E latency=2.38s |Gather latency=0.00s (0.00%) |Generate time=1.61s (67.83%) |Training time=0.45s (19.08%) |Others=0.31 (13.09%)|CurSamplesPerSec=13.47 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4156|ppo_ep: 1|act_loss: -0.0033702850341796875|cri_loss: -0.000652313232421875|unsuper_loss: 0.0
-average reward score: 6.1640625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.96%) |Training time=0.46s (21.38%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4157|ppo_ep: 1|act_loss: -0.047393798828125|cri_loss: -0.022369384765625|unsuper_loss: 0.0
-average reward score: 4.6484375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.34%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4158|ppo_ep: 1|act_loss: -0.033660888671875|cri_loss: -0.0152130126953125|unsuper_loss: 0.0
-average reward score: 5.66796875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.25%) |Training time=0.45s (21.10%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
-[2023-04-14 11:20:54,125] [INFO] [logging.py:96:log_dist] [Rank 0] step=4160, skipped=56, lr=[4.966159025783661e-06, 4.966159025783661e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:20:54,143] [INFO] [timer.py:199:stop] epoch=0/micro_step=4160/global_step=4160, RunningAvgSamplesPerSec=105.31034590166539, CurrSamplesPerSec=109.2526283915123, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:20:54,235] [INFO] [logging.py:96:log_dist] [Rank 0] step=4160, skipped=66, lr=[2.5827596833843844e-06, 2.5827596833843844e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4159|ppo_ep: 1|act_loss: -0.035186767578125|cri_loss: -0.016571044921875|unsuper_loss: 0.0
-average reward score: 5.31640625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.46s (21.16%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4160|ppo_ep: 1|act_loss: 0.023956298828125|cri_loss: 0.0150146484375|unsuper_loss: 0.0
-average reward score: 4.5390625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.21%) |Training time=0.45s (21.13%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4161|ppo_ep: 1|act_loss: 0.032257080078125|cri_loss: 0.0177764892578125|unsuper_loss: 0.0
-average reward score: 5.26953125
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.55%) |Training time=0.47s (20.15%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4162|ppo_ep: 1|act_loss: 0.0159454345703125|cri_loss: 0.00836181640625|unsuper_loss: 0.0
-average reward score: 5.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.52%) |Training time=0.45s (20.79%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4163|ppo_ep: 1|act_loss: 0.042205810546875|cri_loss: 0.0225067138671875|unsuper_loss: 0.0
-average reward score: 6.37109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.00%) |Training time=0.46s (21.35%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4164|ppo_ep: 1|act_loss: 0.018768310546875|cri_loss: 0.0106201171875|unsuper_loss: 0.0
-average reward score: 5.2109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.17%) |Training time=0.45s (21.15%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4165|ppo_ep: 1|act_loss: 0.022857666015625|cri_loss: 0.01219940185546875|unsuper_loss: 0.0
-average reward score: 4.62109375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.74%) |Training time=0.47s (21.61%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4166|ppo_ep: 1|act_loss: -0.009429931640625|cri_loss: -0.003875732421875|unsuper_loss: 0.0
-average reward score: 5.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.43%) |Training time=0.45s (20.90%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4167|ppo_ep: 1|act_loss: 0.03271484375|cri_loss: 0.018402099609375|unsuper_loss: 0.0
-average reward score: 5.53515625
--------------------------------------------------------------------------------------
-|E2E latency=2.67s |Gather latency=0.00s (0.00%) |Generate time=1.59s (59.66%) |Training time=0.44s (16.65%) |Others=0.63 (23.69%)|CurSamplesPerSec=11.98 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4168|ppo_ep: 1|act_loss: -0.02325439453125|cri_loss: -0.01129150390625|unsuper_loss: 0.0
-average reward score: 4.9296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.65%) |Training time=0.44s (20.65%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
-[2023-04-14 11:21:16,351] [INFO] [logging.py:96:log_dist] [Rank 0] step=4170, skipped=56, lr=[4.94758977031817e-06, 4.94758977031817e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:21:16,369] [INFO] [timer.py:199:stop] epoch=0/micro_step=4170/global_step=4170, RunningAvgSamplesPerSec=105.3199038682373, CurrSamplesPerSec=108.86511188022202, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:21:16,461] [INFO] [logging.py:96:log_dist] [Rank 0] step=4170, skipped=66, lr=[2.5731393915977522e-06, 2.5731393915977522e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4169|ppo_ep: 1|act_loss: -0.0391845703125|cri_loss: -0.0189056396484375|unsuper_loss: 0.0
-average reward score: 5.53125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.16%) |Training time=0.46s (21.22%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4170|ppo_ep: 1|act_loss: -0.0283660888671875|cri_loss: -0.0117950439453125|unsuper_loss: 0.0
-average reward score: 6.078125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.33%) |Training time=0.45s (20.99%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4171|ppo_ep: 1|act_loss: 0.000972747802734375|cri_loss: 0.0014219284057617188|unsuper_loss: 0.0
-average reward score: 4.6171875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.62%) |Training time=0.44s (20.69%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4172|ppo_ep: 1|act_loss: -0.00641632080078125|cri_loss: -0.00286102294921875|unsuper_loss: 0.0
-average reward score: 4.84375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.36%) |Training time=0.45s (20.99%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4173|ppo_ep: 1|act_loss: 0.0224609375|cri_loss: 0.01314544677734375|unsuper_loss: 0.0
-average reward score: 5.78515625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.95%) |Training time=0.46s (21.40%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4174|ppo_ep: 1|act_loss: -0.035736083984375|cri_loss: -0.0157012939453125|unsuper_loss: 0.0
-average reward score: 5.8359375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.41%) |Training time=0.45s (20.92%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4175|ppo_ep: 1|act_loss: 0.0092315673828125|cri_loss: 0.005367279052734375|unsuper_loss: 0.0
-average reward score: 5.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.33%) |Training time=0.45s (21.03%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4176|ppo_ep: 1|act_loss: 0.028106689453125|cri_loss: 0.0146331787109375|unsuper_loss: 0.0
-average reward score: 4.40234375
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.41%) |Training time=0.48s (20.31%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.66 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4177|ppo_ep: 1|act_loss: 0.022796630859375|cri_loss: 0.0117950439453125|unsuper_loss: 0.0
-average reward score: 6.27734375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.42%) |Training time=0.45s (20.89%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4178|ppo_ep: 1|act_loss: 0.0141754150390625|cri_loss: 0.007419586181640625|unsuper_loss: 0.0
-average reward score: 6.25390625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.00%) |Training time=0.47s (21.39%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
-[2023-04-14 11:21:38,140] [INFO] [logging.py:96:log_dist] [Rank 0] step=4180, skipped=56, lr=[4.92901869777655e-06, 4.92901869777655e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:21:39,028] [INFO] [timer.py:199:stop] epoch=0/micro_step=4180/global_step=4180, RunningAvgSamplesPerSec=105.2561419271204, CurrSamplesPerSec=27.181817887243163, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:21:39,122] [INFO] [logging.py:96:log_dist] [Rank 0] step=4180, skipped=66, lr=[2.563518015708896e-06, 2.563518015708896e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4179|ppo_ep: 1|act_loss: 0.0024871826171875|cri_loss: 0.0018558502197265625|unsuper_loss: 0.0
-average reward score: 5.4375
--------------------------------------------------------------------------------------
-|E2E latency=3.13s |Gather latency=0.00s (0.00%) |Generate time=1.69s (54.01%) |Training time=1.34s (42.79%) |Others=0.10 (3.20%)|CurSamplesPerSec=10.21 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4180|ppo_ep: 1|act_loss: 0.01177215576171875|cri_loss: 0.00606536865234375|unsuper_loss: 0.0
-average reward score: 5.9375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.97%) |Training time=0.44s (20.36%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4181|ppo_ep: 1|act_loss: -0.051971435546875|cri_loss: -0.025054931640625|unsuper_loss: 0.0
-average reward score: 4.88671875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.38%) |Training time=0.43s (19.87%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4182|ppo_ep: 1|act_loss: -0.03253173828125|cri_loss: -0.015716552734375|unsuper_loss: 0.0
-average reward score: 5.7109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.21%) |Training time=0.44s (20.16%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4183|ppo_ep: 1|act_loss: 0.00038909912109375|cri_loss: 0.0009660720825195312|unsuper_loss: 0.0
-average reward score: 6.40625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.74%) |Training time=0.44s (20.58%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4184|ppo_ep: 1|act_loss: -0.037841796875|cri_loss: -0.0181884765625|unsuper_loss: 0.0
-average reward score: 4.83984375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.50%) |Training time=0.45s (20.82%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4185|ppo_ep: 1|act_loss: -0.006397247314453125|cri_loss: -0.002197265625|unsuper_loss: 0.0
-average reward score: 5.51171875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.35%) |Training time=0.45s (20.99%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4186|ppo_ep: 1|act_loss: -0.02880859375|cri_loss: -0.01381683349609375|unsuper_loss: 0.0
-average reward score: 4.609375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.18%) |Training time=0.45s (21.15%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4187|ppo_ep: 1|act_loss: -0.026763916015625|cri_loss: -0.0127716064453125|unsuper_loss: 0.0
-average reward score: 5.296875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.53%) |Training time=0.45s (20.78%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4188|ppo_ep: 1|act_loss: -0.01885986328125|cri_loss: -0.00865936279296875|unsuper_loss: 0.0
-average reward score: 4.49609375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.09%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
-[2023-04-14 11:22:00,467] [INFO] [logging.py:96:log_dist] [Rank 0] step=4190, skipped=56, lr=[4.910446083426898e-06, 4.910446083426898e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:22:00,728] [INFO] [timer.py:199:stop] epoch=0/micro_step=4190/global_step=4190, RunningAvgSamplesPerSec=105.25487124469956, CurrSamplesPerSec=59.8330459467797, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:22:00,820] [INFO] [logging.py:96:log_dist] [Rank 0] step=4190, skipped=66, lr=[2.5538956983298186e-06, 2.5538956983298186e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4189|ppo_ep: 1|act_loss: -0.0031604766845703125|cri_loss: -0.000560760498046875|unsuper_loss: 0.0
-average reward score: 5.26171875
--------------------------------------------------------------------------------------
-|E2E latency=2.39s |Gather latency=0.00s (0.00%) |Generate time=1.59s (66.67%) |Training time=0.70s (29.16%) |Others=0.10 (4.17%)|CurSamplesPerSec=13.38 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4190|ppo_ep: 1|act_loss: 0.0625|cri_loss: 0.032318115234375|unsuper_loss: 0.0
-average reward score: 5.0625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.55%) |Training time=0.45s (20.77%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4191|ppo_ep: 1|act_loss: 0.029144287109375|cri_loss: 0.014923095703125|unsuper_loss: 0.0
-average reward score: 4.51171875
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.74%) |Training time=0.47s (19.96%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4192|ppo_ep: 1|act_loss: 0.008636474609375|cri_loss: 0.0048980712890625|unsuper_loss: 0.0
-average reward score: 5.68359375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.19%) |Training time=0.45s (21.16%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4193|ppo_ep: 1|act_loss: -0.030487060546875|cri_loss: -0.01329803466796875|unsuper_loss: 0.0
-average reward score: 6.06640625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.44%) |Training time=0.45s (20.90%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4194|ppo_ep: 1|act_loss: 0.003936767578125|cri_loss: 0.0020847320556640625|unsuper_loss: 0.0
-average reward score: 5.24609375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.68%) |Training time=0.44s (20.63%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4195|ppo_ep: 1|act_loss: -0.036102294921875|cri_loss: -0.01715087890625|unsuper_loss: 0.0
-average reward score: 6.40625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.57%) |Training time=0.45s (20.77%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4196|ppo_ep: 1|act_loss: 6.031990051269531e-05|cri_loss: 0.00016546249389648438|unsuper_loss: 0.0
-average reward score: 5.21484375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.30%) |Training time=0.45s (21.03%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4197|ppo_ep: 1|act_loss: 0.0220947265625|cri_loss: 0.01141357421875|unsuper_loss: 0.0
-average reward score: 5.03125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.87%) |Training time=0.44s (20.42%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4198|ppo_ep: 1|act_loss: 0.03314208984375|cri_loss: 0.017913818359375|unsuper_loss: 0.0
-average reward score: 5.17578125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.64%) |Training time=0.44s (20.68%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.46
-[2023-04-14 11:22:22,354] [INFO] [logging.py:96:log_dist] [Rank 0] step=4200, skipped=56, lr=[4.8918722025601635e-06, 4.8918722025601635e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:22:22,373] [INFO] [timer.py:199:stop] epoch=0/micro_step=4200/global_step=4200, RunningAvgSamplesPerSec=105.26922859440923, CurrSamplesPerSec=108.52489355130318, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:22:22,465] [INFO] [logging.py:96:log_dist] [Rank 0] step=4200, skipped=66, lr=[2.5442725820864755e-06, 2.5442725820864755e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4199|ppo_ep: 1|act_loss: 0.003414154052734375|cri_loss: 0.00225067138671875|unsuper_loss: 0.0
-average reward score: 5.47265625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.07%) |Training time=0.46s (21.29%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4200|ppo_ep: 1|act_loss: -0.00171661376953125|cri_loss: -0.0006322860717773438|unsuper_loss: 0.0
-average reward score: 5.171875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.28%) |Training time=0.45s (21.04%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4201|ppo_ep: 1|act_loss: -0.006732940673828125|cri_loss: -0.0029163360595703125|unsuper_loss: 0.0
-average reward score: 5.734375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.19%) |Training time=0.45s (21.16%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4202|ppo_ep: 1|act_loss: 0.0506591796875|cri_loss: 0.0276336669921875|unsuper_loss: 0.0
-average reward score: 6.171875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.07%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4203|ppo_ep: 1|act_loss: 0.038818359375|cri_loss: 0.0203399658203125|unsuper_loss: 0.0
-average reward score: 5.9375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.00%) |Training time=0.45s (20.84%) |Others=0.11 (5.16%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4204|ppo_ep: 1|act_loss: -0.0139923095703125|cri_loss: -0.00634002685546875|unsuper_loss: 0.0
-average reward score: 4.6484375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.08%) |Training time=0.46s (21.26%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4205|ppo_ep: 1|act_loss: 0.00275421142578125|cri_loss: 0.0015354156494140625|unsuper_loss: 0.0
-average reward score: 5.109375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.80%) |Training time=0.46s (21.54%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4206|ppo_ep: 1|act_loss: -0.010833740234375|cri_loss: -0.0052947998046875|unsuper_loss: 0.0
-average reward score: 5.41796875
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.21%) |Training time=0.48s (20.51%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4207|ppo_ep: 1|act_loss: 0.00415802001953125|cri_loss: 0.00371551513671875|unsuper_loss: 0.0
-average reward score: 4.78125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.31%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4208|ppo_ep: 1|act_loss: 0.00266265869140625|cri_loss: 0.0019474029541015625|unsuper_loss: 0.0
-average reward score: 4.9140625
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.45%) |Training time=0.48s (21.17%) |Others=0.10 (4.38%)|CurSamplesPerSec=14.02 |AvgSamplesPerSec=14.46
-[2023-04-14 11:22:44,152] [INFO] [logging.py:96:log_dist] [Rank 0] step=4210, skipped=56, lr=[4.8732973304860655e-06, 4.8732973304860655e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:22:44,170] [INFO] [timer.py:199:stop] epoch=0/micro_step=4210/global_step=4210, RunningAvgSamplesPerSec=105.27366239794104, CurrSamplesPerSec=105.68341682427847, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:22:44,263] [INFO] [logging.py:96:log_dist] [Rank 0] step=4210, skipped=66, lr=[2.5346488096166647e-06, 2.5346488096166647e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4209|ppo_ep: 1|act_loss: 0.004459381103515625|cri_loss: 0.0029392242431640625|unsuper_loss: 0.0
-average reward score: 5.3046875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.70%) |Training time=0.47s (21.66%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4210|ppo_ep: 1|act_loss: -0.007663726806640625|cri_loss: -0.0016937255859375|unsuper_loss: 0.0
-average reward score: 5.87890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.40%) |Training time=0.43s (19.88%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4211|ppo_ep: 1|act_loss: 0.0038623809814453125|cri_loss: 0.002246856689453125|unsuper_loss: 0.0
-average reward score: 4.28515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.37%) |Training time=0.48s (22.00%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4212|ppo_ep: 1|act_loss: -0.0183563232421875|cri_loss: -0.00844573974609375|unsuper_loss: 0.0
-average reward score: 4.8671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.41%) |Training time=0.47s (21.94%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4213|ppo_ep: 1|act_loss: -0.01000213623046875|cri_loss: -0.004795074462890625|unsuper_loss: 0.0
-average reward score: 4.96875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.37%) |Training time=0.47s (21.99%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4214|ppo_ep: 1|act_loss: 0.051055908203125|cri_loss: 0.0279693603515625|unsuper_loss: 0.0
-average reward score: 6.16796875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.85%) |Training time=0.46s (21.52%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4215|ppo_ep: 1|act_loss: -0.01861572265625|cri_loss: -0.00881195068359375|unsuper_loss: 0.0
-average reward score: 5.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.08%) |Training time=0.46s (21.27%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4216|ppo_ep: 1|act_loss: 0.003650665283203125|cri_loss: 0.0027332305908203125|unsuper_loss: 0.0
-average reward score: 5.94140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.46s (21.32%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4217|ppo_ep: 1|act_loss: -0.02685546875|cri_loss: -0.0128631591796875|unsuper_loss: 0.0
-average reward score: 4.828125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.46s (21.18%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4218|ppo_ep: 1|act_loss: -0.01708984375|cri_loss: -0.007843017578125|unsuper_loss: 0.0
-average reward score: 5.98828125
--------------------------------------------------------------------------------------
-|E2E latency=3.50s |Gather latency=0.00s (0.00%) |Generate time=1.60s (45.66%) |Training time=0.45s (12.92%) |Others=1.45 (41.42%)|CurSamplesPerSec=9.14 |AvgSamplesPerSec=14.46
-[2023-04-14 11:23:07,071] [INFO] [logging.py:96:log_dist] [Rank 0] step=4220, skipped=56, lr=[4.85472174252902e-06, 4.85472174252902e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:23:07,089] [INFO] [timer.py:199:stop] epoch=0/micro_step=4220/global_step=4220, RunningAvgSamplesPerSec=105.27928430346253, CurrSamplesPerSec=109.89310827053387, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:23:07,182] [INFO] [logging.py:96:log_dist] [Rank 0] step=4220, skipped=66, lr=[2.5250245235679094e-06, 2.5250245235679094e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4219|ppo_ep: 1|act_loss: 0.005771636962890625|cri_loss: 0.0032749176025390625|unsuper_loss: 0.0
-average reward score: 5.828125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.26%) |Training time=0.45s (21.08%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4220|ppo_ep: 1|act_loss: -0.00738525390625|cri_loss: -0.003170013427734375|unsuper_loss: 0.0
-average reward score: 5.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.93%) |Training time=0.46s (21.40%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4221|ppo_ep: 1|act_loss: -0.004085540771484375|cri_loss: -0.0013275146484375|unsuper_loss: 0.0
-average reward score: 5.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.23%) |Training time=0.48s (20.52%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.64 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4222|ppo_ep: 1|act_loss: -0.0155792236328125|cri_loss: -0.00717926025390625|unsuper_loss: 0.0
-average reward score: 4.3359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.80%) |Training time=0.46s (21.56%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4223|ppo_ep: 1|act_loss: -0.0067138671875|cri_loss: -0.003093719482421875|unsuper_loss: 0.0
-average reward score: 5.1875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.45s (21.13%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4224|ppo_ep: 1|act_loss: -0.0264434814453125|cri_loss: -0.012542724609375|unsuper_loss: 0.0
-average reward score: 5.44140625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.68%) |Training time=0.44s (20.64%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4225|ppo_ep: 1|act_loss: -0.0164642333984375|cri_loss: -0.005649566650390625|unsuper_loss: 0.0
-average reward score: 5.203125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.92%) |Training time=0.44s (20.37%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4226|ppo_ep: 1|act_loss: -0.0190277099609375|cri_loss: -0.0092315673828125|unsuper_loss: 0.0
-average reward score: 6.03125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.59%) |Training time=0.44s (20.70%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4227|ppo_ep: 1|act_loss: 0.0005841255187988281|cri_loss: 0.0010204315185546875|unsuper_loss: 0.0
-average reward score: 5.6015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.05%) |Training time=0.46s (21.32%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4228|ppo_ep: 1|act_loss: 0.0234222412109375|cri_loss: 0.01303863525390625|unsuper_loss: 0.0
-average reward score: 5.36328125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.46s (21.20%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
-[2023-04-14 11:23:28,778] [INFO] [logging.py:96:log_dist] [Rank 0] step=4230, skipped=56, lr=[4.836145714024048e-06, 4.836145714024048e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:23:28,796] [INFO] [timer.py:199:stop] epoch=0/micro_step=4230/global_step=4230, RunningAvgSamplesPerSec=105.28801567814266, CurrSamplesPerSec=109.88582062485825, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:23:28,889] [INFO] [logging.py:96:log_dist] [Rank 0] step=4230, skipped=66, lr=[2.515399866595347e-06, 2.515399866595347e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4229|ppo_ep: 1|act_loss: 0.04559326171875|cri_loss: 0.0240631103515625|unsuper_loss: 0.0
-average reward score: 6.4765625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.19%) |Training time=0.45s (21.15%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4230|ppo_ep: 1|act_loss: 0.021453857421875|cri_loss: 0.011810302734375|unsuper_loss: 0.0
-average reward score: 5.09765625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.94%) |Training time=0.46s (21.42%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4231|ppo_ep: 1|act_loss: 0.0029163360595703125|cri_loss: 0.001949310302734375|unsuper_loss: 0.0
-average reward score: 4.72265625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.57%) |Training time=0.44s (20.74%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
-[2023-04-14 11:23:35,324] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 4232|ppo_ep: 1|act_loss: 0.0218048095703125|cri_loss: 0.011627197265625|unsuper_loss: 0.0
-average reward score: 5.7265625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.62%) |Training time=0.45s (21.19%) |Others=0.09 (4.18%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.46
-[2023-04-14 11:23:37,455] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 4233|ppo_ep: 1|act_loss: -0.028045654296875|cri_loss: -0.0132598876953125|unsuper_loss: 0.0
-average reward score: 5.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.71%) |Training time=0.45s (21.10%) |Others=0.09 (4.19%)|CurSamplesPerSec=15.02 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4234|ppo_ep: 1|act_loss: 0.005527496337890625|cri_loss: 0.002880096435546875|unsuper_loss: 0.0
-average reward score: 5.890625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.29%) |Training time=0.45s (21.04%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4235|ppo_ep: 1|act_loss: 0.05145263671875|cri_loss: 0.0276641845703125|unsuper_loss: 0.0
-average reward score: 6.6015625
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.76s (76.17%) |Training time=0.45s (19.49%) |Others=0.10 (4.34%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4236|ppo_ep: 1|act_loss: -0.0341796875|cri_loss: -0.015838623046875|unsuper_loss: 0.0
-average reward score: 4.73828125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.71%) |Training time=0.44s (20.62%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4237|ppo_ep: 1|act_loss: -0.017791748046875|cri_loss: -0.0085601806640625|unsuper_loss: 0.0
-average reward score: 4.92578125
--------------------------------------------------------------------------------------
-|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.74s (75.86%) |Training time=0.45s (19.78%) |Others=0.10 (4.36%)|CurSamplesPerSec=13.98 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4238|ppo_ep: 1|act_loss: -0.032806396484375|cri_loss: -0.01560211181640625|unsuper_loss: 0.0
-average reward score: 5.3359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.90%) |Training time=0.44s (20.60%) |Others=0.12 (5.50%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
-[2023-04-14 11:23:50,554] [INFO] [logging.py:96:log_dist] [Rank 0] step=4240, skipped=56, lr=[4.817569520312709e-06, 4.817569520312709e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:23:50,572] [INFO] [timer.py:199:stop] epoch=0/micro_step=4240/global_step=4240, RunningAvgSamplesPerSec=105.30150071014239, CurrSamplesPerSec=111.66534771424102, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:23:50,665] [INFO] [logging.py:96:log_dist] [Rank 0] step=4240, skipped=68, lr=[2.5076999698199706e-06, 2.5076999698199706e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4239|ppo_ep: 1|act_loss: -0.001766204833984375|cri_loss: 0.0019130706787109375|unsuper_loss: 0.0
-average reward score: 6.078125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.41%) |Training time=0.45s (20.94%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4240|ppo_ep: 1|act_loss: 0.0226898193359375|cri_loss: 0.0124053955078125|unsuper_loss: 0.0
-average reward score: 4.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.84%) |Training time=0.44s (20.49%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4241|ppo_ep: 1|act_loss: 0.002574920654296875|cri_loss: 0.0016956329345703125|unsuper_loss: 0.0
-average reward score: 6.20703125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.00%) |Training time=0.49s (22.40%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4242|ppo_ep: 1|act_loss: 0.0186767578125|cri_loss: 0.01000213623046875|unsuper_loss: 0.0
-average reward score: 5.8984375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.43%) |Training time=0.50s (22.87%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4243|ppo_ep: 1|act_loss: 0.030120849609375|cri_loss: 0.0162506103515625|unsuper_loss: 0.0
-average reward score: 5.34765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.65%) |Training time=0.47s (21.59%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4244|ppo_ep: 1|act_loss: 0.0592041015625|cri_loss: 0.031158447265625|unsuper_loss: 0.0
-average reward score: 5.484375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.98%) |Training time=0.42s (19.12%) |Others=0.11 (4.90%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4245|ppo_ep: 1|act_loss: 0.0345458984375|cri_loss: 0.0182037353515625|unsuper_loss: 0.0
-average reward score: 4.79296875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.99%) |Training time=0.46s (21.33%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4246|ppo_ep: 1|act_loss: 0.00125885009765625|cri_loss: 0.0011882781982421875|unsuper_loss: 0.0
-average reward score: 4.9765625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.47%) |Training time=0.45s (20.88%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4247|ppo_ep: 1|act_loss: -0.0139312744140625|cri_loss: -0.00653076171875|unsuper_loss: 0.0
-average reward score: 6.15234375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.18%) |Training time=0.46s (21.17%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4248|ppo_ep: 1|act_loss: -0.0172271728515625|cri_loss: -0.0080413818359375|unsuper_loss: 0.0
-average reward score: 7.1796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.60%) |Training time=0.45s (20.74%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-[2023-04-14 11:24:12,290] [INFO] [logging.py:96:log_dist] [Rank 0] step=4250, skipped=56, lr=[4.798993436739004e-06, 4.798993436739004e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:24:12,308] [INFO] [timer.py:199:stop] epoch=0/micro_step=4250/global_step=4250, RunningAvgSamplesPerSec=105.30792594664827, CurrSamplesPerSec=114.12237540451974, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:24:12,401] [INFO] [logging.py:96:log_dist] [Rank 0] step=4250, skipped=68, lr=[2.4980750046916983e-06, 2.4980750046916983e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4249|ppo_ep: 1|act_loss: -0.02313232421875|cri_loss: -0.01113128662109375|unsuper_loss: 0.0
-average reward score: 6.88671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.95%) |Training time=0.44s (20.41%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4250|ppo_ep: 1|act_loss: -0.0238037109375|cri_loss: -0.01103973388671875|unsuper_loss: 0.0
-average reward score: 5.0234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.39%) |Training time=0.45s (20.96%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4251|ppo_ep: 1|act_loss: -0.03948974609375|cri_loss: -0.0182342529296875|unsuper_loss: 0.0
-average reward score: 4.28125
--------------------------------------------------------------------------------------
-|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.28%) |Training time=0.48s (20.47%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.51 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4252|ppo_ep: 1|act_loss: 0.0126495361328125|cri_loss: 0.006908416748046875|unsuper_loss: 0.0
-average reward score: 4.6328125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.88%) |Training time=0.47s (21.50%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4253|ppo_ep: 1|act_loss: 0.024200439453125|cri_loss: 0.0125732421875|unsuper_loss: 0.0
-average reward score: 7.3359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.98%) |Training time=0.46s (21.40%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4254|ppo_ep: 1|act_loss: -0.021636962890625|cri_loss: -0.0095977783203125|unsuper_loss: 0.0
-average reward score: 5.4765625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.67%) |Training time=0.47s (21.71%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4255|ppo_ep: 1|act_loss: -0.01812744140625|cri_loss: -0.008758544921875|unsuper_loss: 0.0
-average reward score: 6.01171875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.73%) |Training time=0.47s (21.62%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4256|ppo_ep: 1|act_loss: -0.004058837890625|cri_loss: -0.0011882781982421875|unsuper_loss: 0.0
-average reward score: 4.5703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.89%) |Training time=0.47s (21.46%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4257|ppo_ep: 1|act_loss: 0.021209716796875|cri_loss: 0.01186370849609375|unsuper_loss: 0.0
-average reward score: 5.91796875
--------------------------------------------------------------------------------------
-|E2E latency=2.86s |Gather latency=0.00s (0.00%) |Generate time=1.61s (56.22%) |Training time=0.47s (16.46%) |Others=0.78 (27.32%)|CurSamplesPerSec=11.18 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4258|ppo_ep: 1|act_loss: 0.06622314453125|cri_loss: 0.0384521484375|unsuper_loss: 0.0
-average reward score: 5.46484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.79%) |Training time=0.44s (20.55%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-[2023-04-14 11:24:34,891] [INFO] [logging.py:96:log_dist] [Rank 0] step=4260, skipped=56, lr=[4.7804177386453025e-06, 4.7804177386453025e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:24:34,909] [INFO] [timer.py:199:stop] epoch=0/micro_step=4260/global_step=4260, RunningAvgSamplesPerSec=105.30860582605034, CurrSamplesPerSec=107.67247823369864, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:24:35,002] [INFO] [logging.py:96:log_dist] [Rank 0] step=4260, skipped=68, lr=[2.488450068096499e-06, 2.488450068096499e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4259|ppo_ep: 1|act_loss: 0.04168701171875|cri_loss: 0.021820068359375|unsuper_loss: 0.0
-average reward score: 4.89453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.92%) |Training time=0.46s (21.16%) |Others=0.11 (4.93%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4260|ppo_ep: 1|act_loss: -0.031890869140625|cri_loss: -0.0142669677734375|unsuper_loss: 0.0
-average reward score: 5.421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.96%) |Training time=0.46s (21.40%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4261|ppo_ep: 1|act_loss: 0.018280029296875|cri_loss: 0.00974273681640625|unsuper_loss: 0.0
-average reward score: 6.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.62%) |Training time=0.47s (21.76%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4262|ppo_ep: 1|act_loss: -0.0208587646484375|cri_loss: -0.00948333740234375|unsuper_loss: 0.0
-average reward score: 6.2421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.35%) |Training time=0.48s (22.02%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4263|ppo_ep: 1|act_loss: -0.01275634765625|cri_loss: -0.0055389404296875|unsuper_loss: 0.0
-average reward score: 5.6796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.70%) |Training time=0.47s (21.67%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4264|ppo_ep: 1|act_loss: -0.0343017578125|cri_loss: -0.01505279541015625|unsuper_loss: 0.0
-average reward score: 5.1875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.60%) |Training time=0.48s (21.81%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4265|ppo_ep: 1|act_loss: -0.0079498291015625|cri_loss: -0.0037689208984375|unsuper_loss: 0.0
-average reward score: 4.6640625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.25%) |Training time=0.48s (22.13%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4266|ppo_ep: 1|act_loss: -0.0089263916015625|cri_loss: -0.0040740966796875|unsuper_loss: 0.0
-average reward score: 5.4609375
--------------------------------------------------------------------------------------
-|E2E latency=2.50s |Gather latency=0.00s (0.00%) |Generate time=1.79s (71.52%) |Training time=0.61s (24.32%) |Others=0.10 (4.15%)|CurSamplesPerSec=12.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4267|ppo_ep: 1|act_loss: -0.00508880615234375|cri_loss: -0.002323150634765625|unsuper_loss: 0.0
-average reward score: 5.50390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.44%) |Training time=0.48s (21.92%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4268|ppo_ep: 1|act_loss: 0.021148681640625|cri_loss: 0.01126861572265625|unsuper_loss: 0.0
-average reward score: 5.125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.31%) |Training time=0.48s (22.07%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-[2023-04-14 11:24:56,993] [INFO] [logging.py:96:log_dist] [Rank 0] step=4270, skipped=56, lr=[4.761842701368264e-06, 4.761842701368264e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:24:57,011] [INFO] [timer.py:199:stop] epoch=0/micro_step=4270/global_step=4270, RunningAvgSamplesPerSec=105.29193536234465, CurrSamplesPerSec=100.81037834200097, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:24:57,104] [INFO] [logging.py:96:log_dist] [Rank 0] step=4270, skipped=68, lr=[2.4788253026991545e-06, 2.4788253026991545e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4269|ppo_ep: 1|act_loss: 0.0455322265625|cri_loss: 0.023406982421875|unsuper_loss: 0.0
-average reward score: 6.078125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.40%) |Training time=0.48s (22.01%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4270|ppo_ep: 1|act_loss: 0.0024509429931640625|cri_loss: 0.0016841888427734375|unsuper_loss: 0.0
-average reward score: 5.0859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.35%) |Training time=0.46s (21.00%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4271|ppo_ep: 1|act_loss: 0.010040283203125|cri_loss: 0.006195068359375|unsuper_loss: 0.0
-average reward score: 4.34765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.48s (21.90%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4272|ppo_ep: 1|act_loss: 0.022003173828125|cri_loss: 0.01129150390625|unsuper_loss: 0.0
-average reward score: 5.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.48s (21.93%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4273|ppo_ep: 1|act_loss: 0.018524169921875|cri_loss: 0.0123443603515625|unsuper_loss: 0.0
-average reward score: 6.09375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.55%) |Training time=0.47s (21.78%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4274|ppo_ep: 1|act_loss: -0.00841522216796875|cri_loss: -0.0037631988525390625|unsuper_loss: 0.0
-average reward score: 5.1953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.23%) |Training time=0.48s (22.16%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4275|ppo_ep: 1|act_loss: -0.0285491943359375|cri_loss: -0.0128021240234375|unsuper_loss: 0.0
-average reward score: 6.06640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.48s (21.92%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4276|ppo_ep: 1|act_loss: -0.0015773773193359375|cri_loss: -0.0004177093505859375|unsuper_loss: 0.0
-average reward score: 5.4765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.69%) |Training time=0.47s (21.67%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4277|ppo_ep: 1|act_loss: -0.031494140625|cri_loss: -0.01528167724609375|unsuper_loss: 0.0
-average reward score: 5.55859375
--------------------------------------------------------------------------------------
-|E2E latency=2.71s |Gather latency=0.00s (0.00%) |Generate time=1.60s (58.98%) |Training time=0.48s (17.68%) |Others=0.63 (23.34%)|CurSamplesPerSec=11.82 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4278|ppo_ep: 1|act_loss: -0.00934600830078125|cri_loss: -0.004230499267578125|unsuper_loss: 0.0
-average reward score: 5.5390625
--------------------------------------------------------------------------------------
-|E2E latency=2.08s |Gather latency=0.00s (0.00%) |Generate time=1.59s (76.59%) |Training time=0.39s (18.58%) |Others=0.10 (4.82%)|CurSamplesPerSec=15.40 |AvgSamplesPerSec=14.46
-[2023-04-14 11:25:19,142] [INFO] [logging.py:96:log_dist] [Rank 0] step=4280, skipped=56, lr=[4.74326860023475e-06, 4.74326860023475e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:25:19,161] [INFO] [timer.py:199:stop] epoch=0/micro_step=4280/global_step=4280, RunningAvgSamplesPerSec=105.29278448645617, CurrSamplesPerSec=100.23129910087523, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:25:19,253] [INFO] [logging.py:96:log_dist] [Rank 0] step=4280, skipped=68, lr=[2.4692008511619042e-06, 2.4692008511619042e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4279|ppo_ep: 1|act_loss: 0.0067291259765625|cri_loss: 0.0035915374755859375|unsuper_loss: 0.0
-average reward score: 6.64453125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.15%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4280|ppo_ep: 1|act_loss: -0.0108489990234375|cri_loss: -0.005157470703125|unsuper_loss: 0.0
-average reward score: 7.859375
--------------------------------------------------------------------------------------
-|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.08%) |Training time=0.48s (20.30%) |Others=0.11 (4.62%)|CurSamplesPerSec=13.58 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4281|ppo_ep: 1|act_loss: 0.0028228759765625|cri_loss: 0.0021076202392578125|unsuper_loss: 0.0
-average reward score: 4.87890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (21.99%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4282|ppo_ep: 1|act_loss: -0.0103912353515625|cri_loss: -0.004909515380859375|unsuper_loss: 0.0
-average reward score: 5.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.10%) |Training time=0.49s (22.32%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4283|ppo_ep: 1|act_loss: 0.004169464111328125|cri_loss: 0.0023479461669921875|unsuper_loss: 0.0
-average reward score: 5.7109375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.29%) |Training time=0.48s (22.07%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4284|ppo_ep: 1|act_loss: 0.01311492919921875|cri_loss: 0.00726318359375|unsuper_loss: 0.0
-average reward score: 5.77734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.35%) |Training time=0.48s (22.04%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4285|ppo_ep: 1|act_loss: 0.0207977294921875|cri_loss: 0.0112457275390625|unsuper_loss: 0.0
-average reward score: 5.87890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.48s (21.95%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4286|ppo_ep: 1|act_loss: 0.0085296630859375|cri_loss: 0.004547119140625|unsuper_loss: 0.0
-average reward score: 4.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.35%) |Training time=0.48s (22.04%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4287|ppo_ep: 1|act_loss: -0.0113525390625|cri_loss: -0.005161285400390625|unsuper_loss: 0.0
-average reward score: 6.25390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.90%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4288|ppo_ep: 1|act_loss: 0.027923583984375|cri_loss: 0.0167694091796875|unsuper_loss: 0.0
-average reward score: 4.68359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.83%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-[2023-04-14 11:25:41,026] [INFO] [logging.py:96:log_dist] [Rank 0] step=4290, skipped=56, lr=[4.724695710557746e-06, 4.724695710557746e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:25:41,044] [INFO] [timer.py:199:stop] epoch=0/micro_step=4290/global_step=4290, RunningAvgSamplesPerSec=105.28350097959824, CurrSamplesPerSec=101.11827769046826, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:25:41,137] [INFO] [logging.py:96:log_dist] [Rank 0] step=4290, skipped=68, lr=[2.4595768561423393e-06, 2.4595768561423393e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4289|ppo_ep: 1|act_loss: -0.03314208984375|cri_loss: -0.015716552734375|unsuper_loss: 0.0
-average reward score: 4.859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.05%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4290|ppo_ep: 1|act_loss: -0.02056884765625|cri_loss: -0.0088958740234375|unsuper_loss: 0.0
-average reward score: 5.39453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.84%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4291|ppo_ep: 1|act_loss: -0.023284912109375|cri_loss: -0.01110076904296875|unsuper_loss: 0.0
-average reward score: 5.453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (22.02%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4292|ppo_ep: 1|act_loss: 0.0148468017578125|cri_loss: 0.008026123046875|unsuper_loss: 0.0
-average reward score: 5.296875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (22.01%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4293|ppo_ep: 1|act_loss: 0.01361846923828125|cri_loss: 0.0087890625|unsuper_loss: 0.0
-average reward score: 5.26171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.47%) |Training time=0.48s (21.92%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4294|ppo_ep: 1|act_loss: 0.004306793212890625|cri_loss: 0.002872467041015625|unsuper_loss: 0.0
-average reward score: 4.5390625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.29%) |Training time=0.48s (22.14%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4295|ppo_ep: 1|act_loss: 0.0026702880859375|cri_loss: 0.0014410018920898438|unsuper_loss: 0.0
-average reward score: 5.55859375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.76%) |Training time=0.48s (21.63%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.54 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4296|ppo_ep: 1|act_loss: 0.00958251953125|cri_loss: 0.005100250244140625|unsuper_loss: 0.0
-average reward score: 4.62109375
--------------------------------------------------------------------------------------
-|E2E latency=3.13s |Gather latency=0.00s (0.00%) |Generate time=1.78s (56.75%) |Training time=0.48s (15.22%) |Others=0.88 (28.02%)|CurSamplesPerSec=10.23 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4297|ppo_ep: 1|act_loss: 0.03570556640625|cri_loss: 0.018829345703125|unsuper_loss: 0.0
-average reward score: 5.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.34%) |Training time=0.48s (22.03%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4298|ppo_ep: 1|act_loss: 0.03399658203125|cri_loss: 0.019012451171875|unsuper_loss: 0.0
-average reward score: 5.3828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.48s (21.97%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-[2023-04-14 11:26:03,707] [INFO] [logging.py:96:log_dist] [Rank 0] step=4300, skipped=56, lr=[4.706124307632283e-06, 4.706124307632283e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:26:03,726] [INFO] [timer.py:199:stop] epoch=0/micro_step=4300/global_step=4300, RunningAvgSamplesPerSec=105.27549328885391, CurrSamplesPerSec=102.87540000751152, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:26:03,819] [INFO] [logging.py:96:log_dist] [Rank 0] step=4300, skipped=68, lr=[2.4499534602912815e-06, 2.4499534602912815e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4299|ppo_ep: 1|act_loss: 0.0301055908203125|cri_loss: 0.0162200927734375|unsuper_loss: 0.0
-average reward score: 5.390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.86%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4300|ppo_ep: 1|act_loss: -0.0199432373046875|cri_loss: -0.009765625|unsuper_loss: 0.0
-average reward score: 4.8671875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.84%) |Training time=0.49s (22.54%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4301|ppo_ep: 1|act_loss: 0.02093505859375|cri_loss: 0.01099395751953125|unsuper_loss: 0.0
-average reward score: 6.29296875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.23%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4302|ppo_ep: 1|act_loss: 0.001983642578125|cri_loss: 0.0019588470458984375|unsuper_loss: 0.0
-average reward score: 5.96875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.14%) |Training time=0.48s (22.22%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4303|ppo_ep: 1|act_loss: -0.041778564453125|cri_loss: -0.0200042724609375|unsuper_loss: 0.0
-average reward score: 5.1796875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.90%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4304|ppo_ep: 1|act_loss: -0.0122833251953125|cri_loss: -0.00597381591796875|unsuper_loss: 0.0
-average reward score: 5.25390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.15%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4305|ppo_ep: 1|act_loss: 0.006443023681640625|cri_loss: 0.0036296844482421875|unsuper_loss: 0.0
-average reward score: 4.6796875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.47s (21.97%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4306|ppo_ep: 1|act_loss: 0.0103607177734375|cri_loss: 0.0058746337890625|unsuper_loss: 0.0
-average reward score: 5.63671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.47%) |Training time=0.47s (21.89%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4307|ppo_ep: 1|act_loss: 0.037078857421875|cri_loss: 0.0198211669921875|unsuper_loss: 0.0
-average reward score: 6.58203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.18%) |Training time=0.48s (22.21%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4308|ppo_ep: 1|act_loss: 0.00476837158203125|cri_loss: 0.0025424957275390625|unsuper_loss: 0.0
-average reward score: 4.515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.24%) |Training time=0.48s (22.12%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-[2023-04-14 11:26:25,365] [INFO] [logging.py:96:log_dist] [Rank 0] step=4310, skipped=56, lr=[4.687554666731353e-06, 4.687554666731353e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:26:25,383] [INFO] [timer.py:199:stop] epoch=0/micro_step=4310/global_step=4310, RunningAvgSamplesPerSec=105.26517600866048, CurrSamplesPerSec=101.96257966198779, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:26:25,476] [INFO] [logging.py:96:log_dist] [Rank 0] step=4310, skipped=68, lr=[2.440330806250673e-06, 2.440330806250673e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4309|ppo_ep: 1|act_loss: 0.052459716796875|cri_loss: 0.0281829833984375|unsuper_loss: 0.0
-average reward score: 6.38671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (21.99%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4310|ppo_ep: 1|act_loss: -0.01421356201171875|cri_loss: -0.00677490234375|unsuper_loss: 0.0
-average reward score: 5.734375
--------------------------------------------------------------------------------------
-|E2E latency=3.60s |Gather latency=0.00s (0.00%) |Generate time=1.59s (44.04%) |Training time=0.50s (14.00%) |Others=1.51 (41.96%)|CurSamplesPerSec=8.88 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4311|ppo_ep: 1|act_loss: 0.0005640983581542969|cri_loss: 0.000530242919921875|unsuper_loss: 0.0
-average reward score: 5.70703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.16%) |Training time=0.48s (22.21%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4312|ppo_ep: 1|act_loss: -0.001445770263671875|cri_loss: -0.0002770423889160156|unsuper_loss: 0.0
-average reward score: 4.609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.12%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4313|ppo_ep: 1|act_loss: -0.0284423828125|cri_loss: -0.0135040283203125|unsuper_loss: 0.0
-average reward score: 5.65625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.81%) |Training time=0.49s (22.58%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4314|ppo_ep: 1|act_loss: -0.0107269287109375|cri_loss: -0.005100250244140625|unsuper_loss: 0.0
-average reward score: 6.0703125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.47%) |Training time=0.47s (21.86%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4315|ppo_ep: 1|act_loss: -0.0107421875|cri_loss: -0.004486083984375|unsuper_loss: 0.0
-average reward score: 5.67578125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.90%) |Training time=0.49s (22.50%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4316|ppo_ep: 1|act_loss: -0.00214385986328125|cri_loss: -0.00075531005859375|unsuper_loss: 0.0
-average reward score: 5.27734375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.10%) |Training time=0.48s (22.28%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4317|ppo_ep: 1|act_loss: -0.015869140625|cri_loss: -0.007343292236328125|unsuper_loss: 0.0
-average reward score: 6.36328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.08%) |Training time=0.48s (22.30%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4318|ppo_ep: 1|act_loss: 0.00504302978515625|cri_loss: 0.0028839111328125|unsuper_loss: 0.0
-average reward score: 5.78125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.34%) |Training time=0.48s (22.07%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.46
-[2023-04-14 11:26:48,503] [INFO] [logging.py:96:log_dist] [Rank 0] step=4320, skipped=56, lr=[4.66898706310183e-06, 4.66898706310183e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:26:48,521] [INFO] [timer.py:199:stop] epoch=0/micro_step=4320/global_step=4320, RunningAvgSamplesPerSec=105.2507615935386, CurrSamplesPerSec=100.87576558125552, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:26:48,614] [INFO] [logging.py:96:log_dist] [Rank 0] step=4320, skipped=68, lr=[2.4307090366514594e-06, 2.4307090366514594e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4319|ppo_ep: 1|act_loss: 0.004791259765625|cri_loss: 0.002666473388671875|unsuper_loss: 0.0
-average reward score: 6.296875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.22%) |Training time=0.48s (22.18%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4320|ppo_ep: 1|act_loss: 0.02197265625|cri_loss: 0.0123138427734375|unsuper_loss: 0.0
-average reward score: 5.37109375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.06%) |Training time=0.48s (22.32%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4321|ppo_ep: 1|act_loss: 0.017242431640625|cri_loss: 0.00876617431640625|unsuper_loss: 0.0
-average reward score: 4.6484375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.99%) |Training time=0.48s (21.82%) |Others=0.14 (6.20%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4322|ppo_ep: 1|act_loss: -0.002384185791015625|cri_loss: -0.0009288787841796875|unsuper_loss: 0.0
-average reward score: 4.65625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.31%) |Training time=0.48s (22.05%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4323|ppo_ep: 1|act_loss: -0.0127716064453125|cri_loss: -0.00608062744140625|unsuper_loss: 0.0
-average reward score: 4.53125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.13%) |Training time=0.48s (22.26%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4324|ppo_ep: 1|act_loss: -0.003589630126953125|cri_loss: -0.000904083251953125|unsuper_loss: 0.0
-average reward score: 5.62890625
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.50%) |Training time=0.48s (21.10%) |Others=0.10 (4.40%)|CurSamplesPerSec=14.01 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4325|ppo_ep: 1|act_loss: -0.06109619140625|cri_loss: -0.0297698974609375|unsuper_loss: 0.0
-average reward score: 4.8984375
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.89%) |Training time=0.49s (20.82%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4326|ppo_ep: 1|act_loss: -0.0284423828125|cri_loss: -0.01348114013671875|unsuper_loss: 0.0
-average reward score: 5.02734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.90%) |Training time=0.49s (22.50%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4327|ppo_ep: 1|act_loss: 0.00945281982421875|cri_loss: 0.00511932373046875|unsuper_loss: 0.0
-average reward score: 5.19921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.12%) |Training time=0.48s (22.26%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4328|ppo_ep: 1|act_loss: -0.01401519775390625|cri_loss: -0.006587982177734375|unsuper_loss: 0.0
-average reward score: 5.3203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.22%) |Training time=0.48s (22.13%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-[2023-04-14 11:27:10,534] [INFO] [logging.py:96:log_dist] [Rank 0] step=4330, skipped=56, lr=[4.650421771960395e-06, 4.650421771960395e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:27:10,551] [INFO] [timer.py:199:stop] epoch=0/micro_step=4330/global_step=4330, RunningAvgSamplesPerSec=105.23655293591679, CurrSamplesPerSec=96.47581163393487, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:27:10,644] [INFO] [logging.py:96:log_dist] [Rank 0] step=4330, skipped=68, lr=[2.421088294111479e-06, 2.421088294111479e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4329|ppo_ep: 1|act_loss: -0.0030517578125|cri_loss: -0.001354217529296875|unsuper_loss: 0.0
-average reward score: 5.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.79%) |Training time=0.49s (22.62%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4330|ppo_ep: 1|act_loss: 0.00681304931640625|cri_loss: 0.003574371337890625|unsuper_loss: 0.0
-average reward score: 4.86328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.85%) |Training time=0.49s (22.53%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4331|ppo_ep: 1|act_loss: 0.0313720703125|cri_loss: 0.016265869140625|unsuper_loss: 0.0
-average reward score: 4.59765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.17%) |Training time=0.48s (22.19%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4332|ppo_ep: 1|act_loss: 0.050048828125|cri_loss: 0.02886962890625|unsuper_loss: 0.0
-average reward score: 5.9609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.03%) |Training time=0.48s (22.36%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4333|ppo_ep: 1|act_loss: 0.0006165504455566406|cri_loss: 0.0004563331604003906|unsuper_loss: 0.0
-average reward score: 4.64453125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.79%) |Training time=0.49s (22.61%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
-[2023-04-14 11:27:21,487] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 4334|ppo_ep: 1|act_loss: 0.007572174072265625|cri_loss: 0.00405120849609375|unsuper_loss: 0.0
-average reward score: 5.3984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.49%) |Training time=0.48s (22.33%) |Others=0.09 (4.18%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-[2023-04-14 11:27:23,649] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 4335|ppo_ep: 1|act_loss: 0.09259033203125|cri_loss: 0.0550537109375|unsuper_loss: 0.0
-average reward score: 5.6171875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.49s (22.67%) |Others=0.09 (4.13%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4336|ppo_ep: 1|act_loss: 0.0041046142578125|cri_loss: 0.00220489501953125|unsuper_loss: 0.0
-average reward score: 5.99609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.03%) |Training time=0.48s (22.35%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4337|ppo_ep: 1|act_loss: -0.0007777214050292969|cri_loss: -1.049041748046875e-05|unsuper_loss: 0.0
-average reward score: 4.23046875
--------------------------------------------------------------------------------------
-|E2E latency=3.05s |Gather latency=0.00s (0.00%) |Generate time=1.58s (51.88%) |Training time=0.49s (15.97%) |Others=0.98 (32.15%)|CurSamplesPerSec=10.49 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4338|ppo_ep: 1|act_loss: 0.0126495361328125|cri_loss: 0.00670623779296875|unsuper_loss: 0.0
-average reward score: 6.1796875
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.58s (70.00%) |Training time=0.49s (21.83%) |Others=0.19 (8.17%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.46
-[2023-04-14 11:27:33,216] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-[2023-04-14 11:27:33,216] [INFO] [logging.py:96:log_dist] [Rank 0] step=4340, skipped=57, lr=[4.633715214549443e-06, 4.633715214549443e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:27:33,217] [INFO] [timer.py:199:stop] epoch=0/micro_step=4340/global_step=4340, RunningAvgSamplesPerSec=105.22239416119598, CurrSamplesPerSec=108.74004434905967, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:27:33,309] [INFO] [logging.py:96:log_dist] [Rank 0] step=4340, skipped=70, lr=[2.413392535391663e-06, 2.413392535391663e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4339|ppo_ep: 1|act_loss: -0.0102386474609375|cri_loss: -0.00473785400390625|unsuper_loss: 0.0
-average reward score: 6.28125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.45%) |Training time=0.46s (20.93%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46
-[2023-04-14 11:27:35,354] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 4340|ppo_ep: 1|act_loss: -0.00921630859375|cri_loss: -0.00395965576171875|unsuper_loss: 0.0
-average reward score: 5.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.07%) |Training time=0.45s (21.25%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4341|ppo_ep: 1|act_loss: -0.0147705078125|cri_loss: -0.0072479248046875|unsuper_loss: 0.0
-average reward score: 6.125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.96%) |Training time=0.49s (22.43%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4342|ppo_ep: 1|act_loss: -0.0070648193359375|cri_loss: -0.003284454345703125|unsuper_loss: 0.0
-average reward score: 6.96484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.02%) |Training time=0.48s (22.35%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4343|ppo_ep: 1|act_loss: -0.040374755859375|cri_loss: -0.0195770263671875|unsuper_loss: 0.0
-average reward score: 5.09765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.98%) |Training time=0.49s (22.39%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4344|ppo_ep: 1|act_loss: -0.0029506683349609375|cri_loss: -0.00118255615234375|unsuper_loss: 0.0
-average reward score: 4.73828125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.79%) |Training time=0.49s (22.60%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4345|ppo_ep: 1|act_loss: 0.01107025146484375|cri_loss: 0.00582122802734375|unsuper_loss: 0.0
-average reward score: 5.76171875
--------------------------------------------------------------------------------------
-|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.64s (73.22%) |Training time=0.50s (22.30%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.32 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4346|ppo_ep: 1|act_loss: 0.037841796875|cri_loss: 0.0195465087890625|unsuper_loss: 0.0
-average reward score: 6.234375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.05%) |Training time=0.48s (22.31%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4347|ppo_ep: 1|act_loss: 0.010650634765625|cri_loss: 0.00545501708984375|unsuper_loss: 0.0
-average reward score: 4.9296875
--------------------------------------------------------------------------------------
-|E2E latency=2.58s |Gather latency=0.00s (0.00%) |Generate time=1.58s (61.26%) |Training time=0.49s (18.85%) |Others=0.51 (19.88%)|CurSamplesPerSec=12.40 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4348|ppo_ep: 1|act_loss: -0.01806640625|cri_loss: -0.00841522216796875|unsuper_loss: 0.0
-average reward score: 5.7890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.95%) |Training time=0.48s (22.41%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-[2023-04-14 11:27:55,334] [INFO] [logging.py:96:log_dist] [Rank 0] step=4350, skipped=58, lr=[4.617010953733625e-06, 4.617010953733625e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:27:55,352] [INFO] [timer.py:199:stop] epoch=0/micro_step=4350/global_step=4350, RunningAvgSamplesPerSec=105.20829371375697, CurrSamplesPerSec=99.3526850550957, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:27:55,445] [INFO] [logging.py:96:log_dist] [Rank 0] step=4350, skipped=70, lr=[2.4037740009053053e-06, 2.4037740009053053e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4349|ppo_ep: 1|act_loss: 0.014923095703125|cri_loss: 0.00814056396484375|unsuper_loss: 0.0
-average reward score: 5.59375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.05%) |Training time=0.48s (22.35%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4350|ppo_ep: 1|act_loss: -0.0151824951171875|cri_loss: -0.00742340087890625|unsuper_loss: 0.0
-average reward score: 4.83984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.96%) |Training time=0.49s (22.42%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4351|ppo_ep: 1|act_loss: -0.00119781494140625|cri_loss: -0.0004775524139404297|unsuper_loss: 0.0
-average reward score: 5.14453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.04%) |Training time=0.48s (22.32%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4352|ppo_ep: 1|act_loss: 0.00487518310546875|cri_loss: 0.0028095245361328125|unsuper_loss: 0.0
-average reward score: 5.4296875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.78%) |Training time=0.50s (22.63%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4353|ppo_ep: 1|act_loss: 0.022918701171875|cri_loss: 0.0121917724609375|unsuper_loss: 0.0
-average reward score: 5.7421875
--------------------------------------------------------------------------------------
-|E2E latency=3.49s |Gather latency=0.00s (0.00%) |Generate time=1.68s (48.11%) |Training time=0.56s (16.00%) |Others=1.25 (35.89%)|CurSamplesPerSec=9.16 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4354|ppo_ep: 1|act_loss: 0.0035247802734375|cri_loss: 0.0018749237060546875|unsuper_loss: 0.0
-average reward score: 5.70703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.83%) |Training time=0.47s (21.51%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4355|ppo_ep: 1|act_loss: -0.0120697021484375|cri_loss: -0.004734039306640625|unsuper_loss: 0.0
-average reward score: 5.90625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.34%) |Training time=0.48s (22.03%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4356|ppo_ep: 1|act_loss: 0.02142333984375|cri_loss: 0.0111846923828125|unsuper_loss: 0.0
-average reward score: 5.4296875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.35%) |Training time=0.48s (22.01%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4357|ppo_ep: 1|act_loss: -0.0118560791015625|cri_loss: -0.00458526611328125|unsuper_loss: 0.0
-average reward score: 6.09765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.27%) |Training time=0.48s (22.10%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4358|ppo_ep: 1|act_loss: 0.0582275390625|cri_loss: 0.0309295654296875|unsuper_loss: 0.0
-average reward score: 5.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.51s |Gather latency=0.00s (0.00%) |Generate time=1.60s (63.54%) |Training time=0.47s (18.86%) |Others=0.44 (17.60%)|CurSamplesPerSec=12.73 |AvgSamplesPerSec=14.46
-[2023-04-14 11:28:18,696] [INFO] [logging.py:96:log_dist] [Rank 0] step=4360, skipped=58, lr=[4.5984536014041675e-06, 4.5984536014041675e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:28:18,714] [INFO] [timer.py:199:stop] epoch=0/micro_step=4360/global_step=4360, RunningAvgSamplesPerSec=105.19267954139451, CurrSamplesPerSec=105.13316387876034, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:28:18,807] [INFO] [logging.py:96:log_dist] [Rank 0] step=4360, skipped=70, lr=[2.3941568927203878e-06, 2.3941568927203878e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4359|ppo_ep: 1|act_loss: -0.001995086669921875|cri_loss: -0.000576019287109375|unsuper_loss: 0.0
-average reward score: 5.63671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.77%) |Training time=0.47s (21.59%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4360|ppo_ep: 1|act_loss: -0.010589599609375|cri_loss: -0.005023956298828125|unsuper_loss: 0.0
-average reward score: 5.0625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.42%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4361|ppo_ep: 1|act_loss: -0.0277099609375|cri_loss: -0.01325225830078125|unsuper_loss: 0.0
-average reward score: 4.890625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.24%) |Training time=0.45s (21.07%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4362|ppo_ep: 1|act_loss: -0.0044097900390625|cri_loss: -0.001445770263671875|unsuper_loss: 0.0
-average reward score: 5.00390625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.45s (21.10%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4363|ppo_ep: 1|act_loss: 0.0423583984375|cri_loss: 0.0220489501953125|unsuper_loss: 0.0
-average reward score: 5.6484375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.45s (21.20%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4364|ppo_ep: 1|act_loss: 0.0306396484375|cri_loss: 0.015960693359375|unsuper_loss: 0.0
-average reward score: 5.4765625
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.32%) |Training time=0.45s (20.97%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4365|ppo_ep: 1|act_loss: 0.003692626953125|cri_loss: 0.0020427703857421875|unsuper_loss: 0.0
-average reward score: 5.4609375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.49%) |Training time=0.44s (20.81%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4366|ppo_ep: 1|act_loss: -0.003925323486328125|cri_loss: -0.0013608932495117188|unsuper_loss: 0.0
-average reward score: 5.90625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.15%) |Training time=0.45s (21.19%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4367|ppo_ep: 1|act_loss: 0.03369140625|cri_loss: 0.0172882080078125|unsuper_loss: 0.0
-average reward score: 5.30859375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.28%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4368|ppo_ep: 1|act_loss: 0.0003256797790527344|cri_loss: 0.0004138946533203125|unsuper_loss: 0.0
-average reward score: 5.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.82s |Gather latency=0.00s (0.00%) |Generate time=1.66s (58.85%) |Training time=0.49s (17.54%) |Others=0.67 (23.62%)|CurSamplesPerSec=11.36 |AvgSamplesPerSec=14.46
-[2023-04-14 11:28:40,789] [INFO] [logging.py:96:log_dist] [Rank 0] step=4370, skipped=58, lr=[4.579899607038848e-06, 4.579899607038848e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:28:40,807] [INFO] [timer.py:199:stop] epoch=0/micro_step=4370/global_step=4370, RunningAvgSamplesPerSec=105.20610923662908, CurrSamplesPerSec=110.14616484290242, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:28:40,900] [INFO] [logging.py:96:log_dist] [Rank 0] step=4370, skipped=70, lr=[2.3845413533856517e-06, 2.3845413533856517e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4369|ppo_ep: 1|act_loss: 0.0188140869140625|cri_loss: 0.0099945068359375|unsuper_loss: 0.0
-average reward score: 5.8203125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.13%) |Training time=0.45s (21.20%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4370|ppo_ep: 1|act_loss: 0.0085601806640625|cri_loss: 0.0045623779296875|unsuper_loss: 0.0
-average reward score: 4.6640625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.40%) |Training time=0.45s (20.89%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4371|ppo_ep: 1|act_loss: -0.01934814453125|cri_loss: -0.00904083251953125|unsuper_loss: 0.0
-average reward score: 5.546875
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.30%) |Training time=0.45s (21.00%) |Others=0.10 (4.69%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4372|ppo_ep: 1|act_loss: -0.006256103515625|cri_loss: -0.0027332305908203125|unsuper_loss: 0.0
-average reward score: 5.35546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.17%) |Training time=0.46s (21.18%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4373|ppo_ep: 1|act_loss: -0.008270263671875|cri_loss: -0.004001617431640625|unsuper_loss: 0.0
-average reward score: 5.28515625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.45s (21.11%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4374|ppo_ep: 1|act_loss: 0.028411865234375|cri_loss: 0.01493072509765625|unsuper_loss: 0.0
-average reward score: 6.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.15%) |Training time=0.45s (21.13%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4375|ppo_ep: 1|act_loss: -0.01084136962890625|cri_loss: -0.00421905517578125|unsuper_loss: 0.0
-average reward score: 5.91796875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.08%) |Training time=0.46s (21.24%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4376|ppo_ep: 1|act_loss: 0.0487060546875|cri_loss: 0.026031494140625|unsuper_loss: 0.0
-average reward score: 4.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.25%) |Training time=0.45s (21.07%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4377|ppo_ep: 1|act_loss: 0.0104522705078125|cri_loss: 0.00604248046875|unsuper_loss: 0.0
-average reward score: 5.77734375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.29%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4378|ppo_ep: 1|act_loss: -0.01346588134765625|cri_loss: -0.006237030029296875|unsuper_loss: 0.0
-average reward score: 6.1484375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.45s (21.20%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
-[2023-04-14 11:29:02,241] [INFO] [logging.py:96:log_dist] [Rank 0] step=4380, skipped=58, lr=[4.561349245652627e-06, 4.561349245652627e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:29:02,259] [INFO] [timer.py:199:stop] epoch=0/micro_step=4380/global_step=4380, RunningAvgSamplesPerSec=105.21642532909777, CurrSamplesPerSec=108.15262171002004, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:29:02,352] [INFO] [logging.py:96:log_dist] [Rank 0] step=4380, skipped=70, lr=[2.374927525426587e-06, 2.374927525426587e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4379|ppo_ep: 1|act_loss: 0.0002435445785522461|cri_loss: 0.0004558563232421875|unsuper_loss: 0.0
-average reward score: 5.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.01%) |Training time=0.46s (21.34%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4380|ppo_ep: 1|act_loss: -0.0017757415771484375|cri_loss: -0.0005774497985839844|unsuper_loss: 0.0
-average reward score: 4.453125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.07%) |Training time=0.46s (21.27%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4381|ppo_ep: 1|act_loss: -0.008941650390625|cri_loss: -0.00415802001953125|unsuper_loss: 0.0
-average reward score: 5.8046875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.32%) |Training time=0.46s (21.11%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4382|ppo_ep: 1|act_loss: -0.02496337890625|cri_loss: -0.0115814208984375|unsuper_loss: 0.0
-average reward score: 4.984375
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.69s (75.22%) |Training time=0.46s (20.31%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.23 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4383|ppo_ep: 1|act_loss: -0.018798828125|cri_loss: -0.0090789794921875|unsuper_loss: 0.0
-average reward score: 5.4765625
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.96%) |Training time=0.45s (19.61%) |Others=0.10 (4.43%)|CurSamplesPerSec=13.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4384|ppo_ep: 1|act_loss: 0.02191162109375|cri_loss: 0.01122283935546875|unsuper_loss: 0.0
-average reward score: 4.734375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.28%) |Training time=0.45s (21.05%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4385|ppo_ep: 1|act_loss: 0.01512908935546875|cri_loss: 0.00812530517578125|unsuper_loss: 0.0
-average reward score: 5.1796875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.42%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4386|ppo_ep: 1|act_loss: 0.01531982421875|cri_loss: 0.0079345703125|unsuper_loss: 0.0
-average reward score: 5.35546875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.21%) |Training time=0.45s (21.07%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4387|ppo_ep: 1|act_loss: 0.0019092559814453125|cri_loss: 0.00247955322265625|unsuper_loss: 0.0
-average reward score: 6.2421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.16%) |Training time=0.48s (21.93%) |Others=0.11 (4.91%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4388|ppo_ep: 1|act_loss: -0.018646240234375|cri_loss: -0.00868988037109375|unsuper_loss: 0.0
-average reward score: 4.13671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.65%) |Training time=0.47s (21.71%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-[2023-04-14 11:29:24,065] [INFO] [logging.py:96:log_dist] [Rank 0] step=4390, skipped=58, lr=[4.542802792206608e-06, 4.542802792206608e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:29:24,084] [INFO] [timer.py:199:stop] epoch=0/micro_step=4390/global_step=4390, RunningAvgSamplesPerSec=105.22203844777437, CurrSamplesPerSec=107.79432154766418, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:29:24,176] [INFO] [logging.py:96:log_dist] [Rank 0] step=4390, skipped=70, lr=[2.3653155513433175e-06, 2.3653155513433175e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4389|ppo_ep: 1|act_loss: -0.0149383544921875|cri_loss: -0.005939483642578125|unsuper_loss: 0.0
-average reward score: 5.04296875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.05%) |Training time=0.46s (21.29%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4390|ppo_ep: 1|act_loss: 0.05224609375|cri_loss: 0.0274200439453125|unsuper_loss: 0.0
-average reward score: 5.609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.76%) |Training time=0.47s (21.61%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4391|ppo_ep: 1|act_loss: -0.0126495361328125|cri_loss: -0.005931854248046875|unsuper_loss: 0.0
-average reward score: 6.5390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.84%) |Training time=0.46s (21.49%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4392|ppo_ep: 1|act_loss: -0.0054779052734375|cri_loss: -0.0024261474609375|unsuper_loss: 0.0
-average reward score: 5.03125
--------------------------------------------------------------------------------------
-|E2E latency=2.97s |Gather latency=0.00s (0.00%) |Generate time=1.59s (53.63%) |Training time=0.47s (15.71%) |Others=0.91 (30.66%)|CurSamplesPerSec=10.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4393|ppo_ep: 1|act_loss: 0.0209808349609375|cri_loss: 0.01116180419921875|unsuper_loss: 0.0
-average reward score: 4.79296875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.62%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4394|ppo_ep: 1|act_loss: -0.013763427734375|cri_loss: -0.006748199462890625|unsuper_loss: 0.0
-average reward score: 6.87109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.60%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4395|ppo_ep: 1|act_loss: -0.0205078125|cri_loss: -0.0098724365234375|unsuper_loss: 0.0
-average reward score: 4.5703125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.83%) |Training time=0.46s (21.52%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4396|ppo_ep: 1|act_loss: -0.02789306640625|cri_loss: -0.013275146484375|unsuper_loss: 0.0
-average reward score: 5.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.73%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4397|ppo_ep: 1|act_loss: 0.00476837158203125|cri_loss: 0.0038661956787109375|unsuper_loss: 0.0
-average reward score: 4.80078125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.81%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4398|ppo_ep: 1|act_loss: 0.0108642578125|cri_loss: 0.00586700439453125|unsuper_loss: 0.0
-average reward score: 4.7578125
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.88%) |Training time=0.46s (19.70%) |Others=0.10 (4.42%)|CurSamplesPerSec=13.77 |AvgSamplesPerSec=14.46
-[2023-04-14 11:29:46,633] [INFO] [logging.py:96:log_dist] [Rank 0] step=4400, skipped=58, lr=[4.524260521603971e-06, 4.524260521603971e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:29:46,652] [INFO] [timer.py:199:stop] epoch=0/micro_step=4400/global_step=4400, RunningAvgSamplesPerSec=105.2229391910278, CurrSamplesPerSec=105.90347959812492, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:29:46,744] [INFO] [logging.py:96:log_dist] [Rank 0] step=4400, skipped=70, lr=[2.3557055736084847e-06, 2.3557055736084847e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4399|ppo_ep: 1|act_loss: -0.02545166015625|cri_loss: -0.01221466064453125|unsuper_loss: 0.0
-average reward score: 4.72265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.99%) |Training time=0.46s (21.41%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4400|ppo_ep: 1|act_loss: -0.03961181640625|cri_loss: -0.0188751220703125|unsuper_loss: 0.0
-average reward score: 6.0703125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.74%) |Training time=0.47s (21.61%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4401|ppo_ep: 1|act_loss: 0.063720703125|cri_loss: 0.034637451171875|unsuper_loss: 0.0
-average reward score: 5.76171875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.47s (21.57%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4402|ppo_ep: 1|act_loss: -0.0141143798828125|cri_loss: -0.006595611572265625|unsuper_loss: 0.0
-average reward score: 5.3828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.47s (21.69%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4403|ppo_ep: 1|act_loss: 0.03546142578125|cri_loss: 0.01824951171875|unsuper_loss: 0.0
-average reward score: 7.25
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.61%) |Training time=0.47s (21.76%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4404|ppo_ep: 1|act_loss: -0.0003781318664550781|cri_loss: 6.4849853515625e-05|unsuper_loss: 0.0
-average reward score: 5.69921875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.09%) |Training time=0.46s (21.24%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4405|ppo_ep: 1|act_loss: -0.0084228515625|cri_loss: -0.00386810302734375|unsuper_loss: 0.0
-average reward score: 6.1015625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.00%) |Training time=0.46s (21.32%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4406|ppo_ep: 1|act_loss: 0.03094482421875|cri_loss: 0.0161285400390625|unsuper_loss: 0.0
-average reward score: 4.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.17%) |Training time=0.45s (21.18%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4407|ppo_ep: 1|act_loss: 0.0733642578125|cri_loss: 0.039581298828125|unsuper_loss: 0.0
-average reward score: 5.56640625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.95%) |Training time=0.46s (21.38%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4408|ppo_ep: 1|act_loss: 0.0005645751953125|cri_loss: 0.0008716583251953125|unsuper_loss: 0.0
-average reward score: 4.87890625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.21%) |Training time=0.45s (21.09%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
-[2023-04-14 11:30:08,176] [INFO] [logging.py:96:log_dist] [Rank 0] step=4410, skipped=58, lr=[4.505722708685901e-06, 4.505722708685901e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:30:08,194] [INFO] [timer.py:199:stop] epoch=0/micro_step=4410/global_step=4410, RunningAvgSamplesPerSec=105.22675621955904, CurrSamplesPerSec=107.51842912451345, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:30:08,287] [INFO] [logging.py:96:log_dist] [Rank 0] step=4410, skipped=70, lr=[2.346097734665143e-06, 2.346097734665143e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4409|ppo_ep: 1|act_loss: 0.009124755859375|cri_loss: 0.004863739013671875|unsuper_loss: 0.0
-average reward score: 5.6015625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.92%) |Training time=0.46s (21.44%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4410|ppo_ep: 1|act_loss: 0.00501251220703125|cri_loss: 0.0026264190673828125|unsuper_loss: 0.0
-average reward score: 4.6171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.91%) |Training time=0.47s (21.47%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4411|ppo_ep: 1|act_loss: 0.004062652587890625|cri_loss: 0.00296783447265625|unsuper_loss: 0.0
-average reward score: 4.765625
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.56%) |Training time=0.45s (19.98%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.25 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4412|ppo_ep: 1|act_loss: 0.01087188720703125|cri_loss: 0.00595855712890625|unsuper_loss: 0.0
-average reward score: 4.10546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.96%) |Training time=0.46s (21.38%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
-[2023-04-14 11:30:17,048] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
-epoch: 0|step: 4413|ppo_ep: 1|act_loss: 0.022308349609375|cri_loss: 0.0118408203125|unsuper_loss: 0.0
-average reward score: 5.53515625
--------------------------------------------------------------------------------------
-|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.76s (76.95%) |Training time=0.43s (18.68%) |Others=0.10 (4.37%)|CurSamplesPerSec=13.97 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4414|ppo_ep: 1|act_loss: -0.019378662109375|cri_loss: -0.00940704345703125|unsuper_loss: 0.0
-average reward score: 5.0
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.68%) |Training time=0.47s (21.67%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4415|ppo_ep: 1|act_loss: -0.0246429443359375|cri_loss: -0.0103607177734375|unsuper_loss: 0.0
-average reward score: 4.65234375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.75%) |Training time=0.47s (21.65%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4416|ppo_ep: 1|act_loss: -0.0114288330078125|cri_loss: -0.005565643310546875|unsuper_loss: 0.0
-average reward score: 4.99609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.70%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4417|ppo_ep: 1|act_loss: -0.00141143798828125|cri_loss: 0.0003528594970703125|unsuper_loss: 0.0
-average reward score: 5.421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.96%) |Training time=0.48s (22.40%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4418|ppo_ep: 1|act_loss: 0.00626373291015625|cri_loss: 0.003574371337890625|unsuper_loss: 0.0
-average reward score: 5.484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.58%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-[2023-04-14 11:30:30,012] [INFO] [logging.py:96:log_dist] [Rank 0] step=4420, skipped=59, lr=[4.489042715482996e-06, 4.489042715482996e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:30:30,030] [INFO] [timer.py:199:stop] epoch=0/micro_step=4420/global_step=4420, RunningAvgSamplesPerSec=105.22975983988711, CurrSamplesPerSec=105.75086433215567, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:30:30,123] [INFO] [logging.py:96:log_dist] [Rank 0] step=4420, skipped=70, lr=[2.3364921769246423e-06, 2.3364921769246423e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4419|ppo_ep: 1|act_loss: 0.015380859375|cri_loss: 0.00812530517578125|unsuper_loss: 0.0
-average reward score: 5.80859375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.85%) |Training time=0.47s (21.52%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4420|ppo_ep: 1|act_loss: 0.0207672119140625|cri_loss: 0.0114898681640625|unsuper_loss: 0.0
-average reward score: 5.3125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.82%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4421|ppo_ep: 1|act_loss: 0.03228759765625|cri_loss: 0.0165252685546875|unsuper_loss: 0.0
-average reward score: 5.015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.79%) |Training time=0.47s (21.56%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4422|ppo_ep: 1|act_loss: -0.0010213851928710938|cri_loss: -0.00040078163146972656|unsuper_loss: 0.0
-average reward score: 6.07421875
--------------------------------------------------------------------------------------
-|E2E latency=2.41s |Gather latency=0.00s (0.00%) |Generate time=1.59s (66.06%) |Training time=0.47s (19.46%) |Others=0.35 (14.49%)|CurSamplesPerSec=13.27 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4423|ppo_ep: 1|act_loss: 6.699562072753906e-05|cri_loss: 0.00020170211791992188|unsuper_loss: 0.0
-average reward score: 5.52734375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.61%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4424|ppo_ep: 1|act_loss: -0.0081939697265625|cri_loss: -0.00389862060546875|unsuper_loss: 0.0
-average reward score: 5.8203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.47s (21.57%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4425|ppo_ep: 1|act_loss: -0.0167236328125|cri_loss: -0.0081329345703125|unsuper_loss: 0.0
-average reward score: 5.1953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.29%) |Training time=0.48s (22.06%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4426|ppo_ep: 1|act_loss: -0.0308990478515625|cri_loss: -0.014556884765625|unsuper_loss: 0.0
-average reward score: 5.23828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.52%) |Training time=0.47s (21.82%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4427|ppo_ep: 1|act_loss: 0.00275421142578125|cri_loss: 0.0015478134155273438|unsuper_loss: 0.0
-average reward score: 6.15625
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.60s (70.08%) |Training time=0.58s (25.53%) |Others=0.10 (4.39%)|CurSamplesPerSec=14.01 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4428|ppo_ep: 1|act_loss: 0.0010356903076171875|cri_loss: 0.001148223876953125|unsuper_loss: 0.0
-average reward score: 4.86328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.13%) |Training time=0.48s (22.23%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-[2023-04-14 11:30:52,008] [INFO] [logging.py:96:log_dist] [Rank 0] step=4430, skipped=59, lr=[4.470514129113125e-06, 4.470514129113125e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:30:52,026] [INFO] [timer.py:199:stop] epoch=0/micro_step=4430/global_step=4430, RunningAvgSamplesPerSec=105.21941202542125, CurrSamplesPerSec=96.38726764544512, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:30:52,119] [INFO] [logging.py:96:log_dist] [Rank 0] step=4430, skipped=70, lr=[2.3268890427645213e-06, 2.3268890427645213e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4429|ppo_ep: 1|act_loss: 0.0251617431640625|cri_loss: 0.013397216796875|unsuper_loss: 0.0
-average reward score: 5.58203125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.70%) |Training time=0.49s (22.71%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4430|ppo_ep: 1|act_loss: -0.0066070556640625|cri_loss: -0.003173828125|unsuper_loss: 0.0
-average reward score: 5.51953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.00%) |Training time=0.49s (22.38%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4431|ppo_ep: 1|act_loss: -0.024017333984375|cri_loss: -0.011444091796875|unsuper_loss: 0.0
-average reward score: 5.078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.48s (22.29%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4432|ppo_ep: 1|act_loss: -0.0283203125|cri_loss: -0.01357269287109375|unsuper_loss: 0.0
-average reward score: 5.72265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.91%) |Training time=0.49s (22.47%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4433|ppo_ep: 1|act_loss: -0.0015773773193359375|cri_loss: -0.0005788803100585938|unsuper_loss: 0.0
-average reward score: 5.55078125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.47s (21.91%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4434|ppo_ep: 1|act_loss: 0.0058746337890625|cri_loss: 0.00351715087890625|unsuper_loss: 0.0
-average reward score: 5.4921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.14%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4435|ppo_ep: 1|act_loss: -0.0010538101196289062|cri_loss: 8.96453857421875e-05|unsuper_loss: 0.0
-average reward score: 4.7421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.05%) |Training time=0.48s (22.31%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-[2023-04-14 11:31:07,251] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 4436|ppo_ep: 1|act_loss: -0.00556182861328125|cri_loss: -0.002140045166015625|unsuper_loss: 0.0
-average reward score: 5.87890625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.79%) |Training time=0.47s (22.02%) |Others=0.09 (4.19%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
-[2023-04-14 11:31:09,405] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 4437|ppo_ep: 1|act_loss: -0.0242919921875|cri_loss: -0.01165008544921875|unsuper_loss: 0.0
-average reward score: 5.0
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.19%) |Training time=0.49s (22.66%) |Others=0.09 (4.15%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4438|ppo_ep: 1|act_loss: 0.00684356689453125|cri_loss: 0.0038604736328125|unsuper_loss: 0.0
-average reward score: 4.28515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.05%) |Training time=0.48s (22.32%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-[2023-04-14 11:31:13,619] [INFO] [logging.py:96:log_dist] [Rank 0] step=4440, skipped=59, lr=[4.451990797079012e-06, 4.451990797079012e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:31:13,637] [INFO] [timer.py:199:stop] epoch=0/micro_step=4440/global_step=4440, RunningAvgSamplesPerSec=105.20787125582682, CurrSamplesPerSec=100.67366538753491, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:31:13,729] [INFO] [logging.py:96:log_dist] [Rank 0] step=4440, skipped=72, lr=[2.3192083760691226e-06, 2.3192083760691226e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4439|ppo_ep: 1|act_loss: -0.00165557861328125|cri_loss: -0.0004649162292480469|unsuper_loss: 0.0
-average reward score: 4.8671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.23%) |Training time=0.48s (22.17%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4440|ppo_ep: 1|act_loss: 0.0289306640625|cri_loss: 0.01500701904296875|unsuper_loss: 0.0
-average reward score: 6.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.31%) |Training time=0.48s (21.83%) |Others=0.11 (4.85%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4441|ppo_ep: 1|act_loss: 0.002044677734375|cri_loss: 0.001621246337890625|unsuper_loss: 0.0
-average reward score: 5.67578125
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.67s (74.29%) |Training time=0.48s (21.24%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.25 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4442|ppo_ep: 1|act_loss: 0.021697998046875|cri_loss: 0.011474609375|unsuper_loss: 0.0
-average reward score: 5.6328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.91%) |Training time=0.49s (22.45%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4443|ppo_ep: 1|act_loss: 0.0056304931640625|cri_loss: 0.003143310546875|unsuper_loss: 0.0
-average reward score: 5.25
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.18%) |Training time=0.48s (22.19%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4444|ppo_ep: 1|act_loss: 0.027313232421875|cri_loss: 0.01392364501953125|unsuper_loss: 0.0
-average reward score: 4.96484375
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.74s (74.68%) |Training time=0.48s (20.67%) |Others=0.11 (4.65%)|CurSamplesPerSec=13.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4445|ppo_ep: 1|act_loss: 0.03533935546875|cri_loss: 0.01910400390625|unsuper_loss: 0.0
-average reward score: 4.9375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.88%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4446|ppo_ep: 1|act_loss: 0.003787994384765625|cri_loss: 0.0020809173583984375|unsuper_loss: 0.0
-average reward score: 6.28125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.70%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4447|ppo_ep: 1|act_loss: 0.0007276535034179688|cri_loss: 0.0010051727294921875|unsuper_loss: 0.0
-average reward score: 5.796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.10%) |Training time=0.48s (22.28%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4448|ppo_ep: 1|act_loss: -0.0034160614013671875|cri_loss: -0.0006732940673828125|unsuper_loss: 0.0
-average reward score: 4.84375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.46s (21.49%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
-[2023-04-14 11:31:35,512] [INFO] [logging.py:96:log_dist] [Rank 0] step=4450, skipped=59, lr=[4.433472993941121e-06, 4.433472993941121e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:31:35,530] [INFO] [timer.py:199:stop] epoch=0/micro_step=4450/global_step=4450, RunningAvgSamplesPerSec=105.20223601286291, CurrSamplesPerSec=111.97585245167195, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:31:35,623] [INFO] [logging.py:96:log_dist] [Rank 0] step=4450, skipped=72, lr=[2.309609963029065e-06, 2.309609963029065e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4449|ppo_ep: 1|act_loss: -0.01202392578125|cri_loss: -0.005767822265625|unsuper_loss: 0.0
-average reward score: 6.08203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.59%) |Training time=0.45s (20.74%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4450|ppo_ep: 1|act_loss: -0.024658203125|cri_loss: -0.01102447509765625|unsuper_loss: 0.0
-average reward score: 5.53125
--------------------------------------------------------------------------------------
-|E2E latency=2.52s |Gather latency=0.00s (0.00%) |Generate time=1.60s (63.36%) |Training time=0.46s (18.39%) |Others=0.46 (18.25%)|CurSamplesPerSec=12.71 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4451|ppo_ep: 1|act_loss: -0.0023059844970703125|cri_loss: -0.0007138252258300781|unsuper_loss: 0.0
-average reward score: 4.796875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.65%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4452|ppo_ep: 1|act_loss: -0.006069183349609375|cri_loss: -0.0021514892578125|unsuper_loss: 0.0
-average reward score: 5.28125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.91%) |Training time=0.46s (21.42%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4453|ppo_ep: 1|act_loss: -0.029815673828125|cri_loss: -0.01385498046875|unsuper_loss: 0.0
-average reward score: 6.00390625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.96%) |Training time=0.46s (21.36%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4454|ppo_ep: 1|act_loss: -0.016937255859375|cri_loss: -0.007965087890625|unsuper_loss: 0.0
-average reward score: 5.71484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.48s (21.96%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4455|ppo_ep: 1|act_loss: -0.036224365234375|cri_loss: -0.0174713134765625|unsuper_loss: 0.0
-average reward score: 5.5703125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.86%) |Training time=0.46s (21.49%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4456|ppo_ep: 1|act_loss: 0.03314208984375|cri_loss: 0.0172271728515625|unsuper_loss: 0.0
-average reward score: 5.59765625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.80%) |Training time=0.46s (21.56%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4457|ppo_ep: 1|act_loss: 0.0027675628662109375|cri_loss: 0.0017824172973632812|unsuper_loss: 0.0
-average reward score: 5.87890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.47s (21.76%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4458|ppo_ep: 1|act_loss: 0.026031494140625|cri_loss: 0.01338958740234375|unsuper_loss: 0.0
-average reward score: 4.76953125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.39%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
-[2023-04-14 11:31:57,598] [INFO] [logging.py:96:log_dist] [Rank 0] step=4460, skipped=59, lr=[4.41496099417797e-06, 4.41496099417797e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:31:57,616] [INFO] [timer.py:199:stop] epoch=0/micro_step=4460/global_step=4460, RunningAvgSamplesPerSec=105.20444311444572, CurrSamplesPerSec=108.08268602342073, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:31:57,709] [INFO] [logging.py:96:log_dist] [Rank 0] step=4460, skipped=72, lr=[2.3000143720286463e-06, 2.3000143720286463e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4459|ppo_ep: 1|act_loss: 0.06365966796875|cri_loss: 0.03680419921875|unsuper_loss: 0.0
-average reward score: 5.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.96%) |Training time=0.46s (19.74%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4460|ppo_ep: 1|act_loss: 0.01898193359375|cri_loss: 0.00991058349609375|unsuper_loss: 0.0
-average reward score: 6.82421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.76%) |Training time=0.47s (21.60%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4461|ppo_ep: 1|act_loss: -0.0066986083984375|cri_loss: -0.00315093994140625|unsuper_loss: 0.0
-average reward score: 4.9140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.46s (21.53%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4462|ppo_ep: 1|act_loss: 0.0011196136474609375|cri_loss: 0.0011081695556640625|unsuper_loss: 0.0
-average reward score: 5.98046875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.61%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4463|ppo_ep: 1|act_loss: -0.0222320556640625|cri_loss: -0.00909423828125|unsuper_loss: 0.0
-average reward score: 6.11328125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.27%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4464|ppo_ep: 1|act_loss: -0.00608062744140625|cri_loss: 0.001983642578125|unsuper_loss: 0.0
-average reward score: 6.4453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.27%) |Training time=0.45s (21.00%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4465|ppo_ep: 1|act_loss: -0.028961181640625|cri_loss: -0.0133514404296875|unsuper_loss: 0.0
-average reward score: 5.4765625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.13%) |Training time=0.46s (21.17%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4466|ppo_ep: 1|act_loss: 0.029052734375|cri_loss: 0.01490020751953125|unsuper_loss: 0.0
-average reward score: 4.953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.31%) |Training time=0.41s (18.96%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4467|ppo_ep: 1|act_loss: -0.0139923095703125|cri_loss: -0.005023956298828125|unsuper_loss: 0.0
-average reward score: 5.91015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.65%) |Training time=0.40s (18.63%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4468|ppo_ep: 1|act_loss: 0.006591796875|cri_loss: 0.0034809112548828125|unsuper_loss: 0.0
-average reward score: 6.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.92%) |Training time=0.46s (21.40%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-[2023-04-14 11:32:19,212] [INFO] [logging.py:96:log_dist] [Rank 0] step=4470, skipped=59, lr=[4.3964550721820475e-06, 4.3964550721820475e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:32:19,230] [INFO] [timer.py:199:stop] epoch=0/micro_step=4470/global_step=4470, RunningAvgSamplesPerSec=105.21677106152741, CurrSamplesPerSec=103.61686894506175, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:32:19,323] [INFO] [logging.py:96:log_dist] [Rank 0] step=4470, skipped=72, lr=[2.2904217452976725e-06, 2.2904217452976725e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4469|ppo_ep: 1|act_loss: -0.00719451904296875|cri_loss: -0.002063751220703125|unsuper_loss: 0.0
-average reward score: 5.703125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.72%) |Training time=0.47s (21.67%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4470|ppo_ep: 1|act_loss: 0.01214599609375|cri_loss: 0.00646209716796875|unsuper_loss: 0.0
-average reward score: 6.7265625
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.99%) |Training time=0.46s (20.54%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.17 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4471|ppo_ep: 1|act_loss: 0.0025005340576171875|cri_loss: 0.00406646728515625|unsuper_loss: 0.0
-average reward score: 7.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.09%) |Training time=0.46s (21.23%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4472|ppo_ep: 1|act_loss: 0.004077911376953125|cri_loss: 0.002239227294921875|unsuper_loss: 0.0
-average reward score: 5.1171875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.67%) |Training time=0.45s (20.63%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4473|ppo_ep: 1|act_loss: -0.0126495361328125|cri_loss: -0.005893707275390625|unsuper_loss: 0.0
-average reward score: 5.78515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.45%) |Training time=0.45s (20.88%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4474|ppo_ep: 1|act_loss: -0.0308990478515625|cri_loss: -0.0146636962890625|unsuper_loss: 0.0
-average reward score: 6.41015625
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.33%) |Training time=0.47s (20.34%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4475|ppo_ep: 1|act_loss: -0.0252532958984375|cri_loss: -0.01204681396484375|unsuper_loss: 0.0
-average reward score: 4.92578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.86%) |Training time=0.46s (21.47%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4476|ppo_ep: 1|act_loss: -0.0056610107421875|cri_loss: -0.002590179443359375|unsuper_loss: 0.0
-average reward score: 5.1171875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.85%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4477|ppo_ep: 1|act_loss: 0.0009708404541015625|cri_loss: 0.0006566047668457031|unsuper_loss: 0.0
-average reward score: 5.7109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.64%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4478|ppo_ep: 1|act_loss: 0.04119873046875|cri_loss: 0.02197265625|unsuper_loss: 0.0
-average reward score: 5.94140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.09%) |Training time=0.46s (21.23%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-[2023-04-14 11:32:41,073] [INFO] [logging.py:96:log_dist] [Rank 0] step=4480, skipped=59, lr=[4.377955502255766e-06, 4.377955502255766e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:32:41,092] [INFO] [timer.py:199:stop] epoch=0/micro_step=4480/global_step=4480, RunningAvgSamplesPerSec=105.22041393282225, CurrSamplesPerSec=104.97159258882692, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:32:41,184] [INFO] [logging.py:96:log_dist] [Rank 0] step=4480, skipped=72, lr=[2.2808322250220145e-06, 2.2808322250220145e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4479|ppo_ep: 1|act_loss: -0.024658203125|cri_loss: -0.01195526123046875|unsuper_loss: 0.0
-average reward score: 5.55859375
--------------------------------------------------------------------------------------
-|E2E latency=3.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (50.62%) |Training time=0.47s (14.91%) |Others=1.08 (34.47%)|CurSamplesPerSec=10.19 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4480|ppo_ep: 1|act_loss: 0.01971435546875|cri_loss: 0.0106048583984375|unsuper_loss: 0.0
-average reward score: 5.31640625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.39%) |Training time=0.45s (20.92%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4481|ppo_ep: 1|act_loss: -0.017547607421875|cri_loss: -0.00855255126953125|unsuper_loss: 0.0
-average reward score: 5.8671875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.67%) |Training time=0.44s (20.64%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4482|ppo_ep: 1|act_loss: -0.00038242340087890625|cri_loss: 0.0005755424499511719|unsuper_loss: 0.0
-average reward score: 6.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.98%) |Training time=0.44s (20.38%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4483|ppo_ep: 1|act_loss: -0.0160369873046875|cri_loss: -0.007762908935546875|unsuper_loss: 0.0
-average reward score: 5.08984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.64%) |Training time=0.44s (20.68%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4484|ppo_ep: 1|act_loss: 0.00464630126953125|cri_loss: 0.002918243408203125|unsuper_loss: 0.0
-average reward score: 5.56640625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.43%) |Training time=0.45s (20.91%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4485|ppo_ep: 1|act_loss: -0.023712158203125|cri_loss: -0.0107879638671875|unsuper_loss: 0.0
-average reward score: 5.23046875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.32%) |Training time=0.45s (20.99%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4486|ppo_ep: 1|act_loss: 0.007293701171875|cri_loss: 0.004192352294921875|unsuper_loss: 0.0
-average reward score: 5.14453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.82%) |Training time=0.47s (21.54%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4487|ppo_ep: 1|act_loss: -0.0009126663208007812|cri_loss: -7.581710815429688e-05|unsuper_loss: 0.0
-average reward score: 6.16796875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.17%) |Training time=0.46s (21.17%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4488|ppo_ep: 1|act_loss: 0.013427734375|cri_loss: 0.007648468017578125|unsuper_loss: 0.0
-average reward score: 5.72265625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.86%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
-[2023-04-14 11:33:03,613] [INFO] [logging.py:96:log_dist] [Rank 0] step=4490, skipped=59, lr=[4.3594625586073746e-06, 4.3594625586073746e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:33:03,632] [INFO] [timer.py:199:stop] epoch=0/micro_step=4490/global_step=4490, RunningAvgSamplesPerSec=105.23280805486438, CurrSamplesPerSec=110.25764020668524, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:33:03,725] [INFO] [logging.py:96:log_dist] [Rank 0] step=4490, skipped=72, lr=[2.271245953341494e-06, 2.271245953341494e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4489|ppo_ep: 1|act_loss: 0.0073394775390625|cri_loss: 0.0040130615234375|unsuper_loss: 0.0
-average reward score: 4.859375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.31%) |Training time=0.45s (21.05%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4490|ppo_ep: 1|act_loss: -0.017333984375|cri_loss: -0.0079803466796875|unsuper_loss: 0.0
-average reward score: 5.0859375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.33%) |Training time=0.45s (21.01%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4491|ppo_ep: 1|act_loss: -0.015625|cri_loss: -0.0067596435546875|unsuper_loss: 0.0
-average reward score: 5.7265625
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.40%) |Training time=0.45s (19.27%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4492|ppo_ep: 1|act_loss: -0.046142578125|cri_loss: -0.0220184326171875|unsuper_loss: 0.0
-average reward score: 5.125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.45s (21.08%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4493|ppo_ep: 1|act_loss: -0.0009975433349609375|cri_loss: -0.000133514404296875|unsuper_loss: 0.0
-average reward score: 5.25390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.25%) |Training time=0.46s (21.10%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4494|ppo_ep: 1|act_loss: -0.0071868896484375|cri_loss: -0.0030841827392578125|unsuper_loss: 0.0
-average reward score: 5.46484375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.56%) |Training time=0.45s (20.77%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4495|ppo_ep: 1|act_loss: -0.0185546875|cri_loss: -0.00876617431640625|unsuper_loss: 0.0
-average reward score: 5.984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.60%) |Training time=0.45s (20.73%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4496|ppo_ep: 1|act_loss: -0.00152587890625|cri_loss: 0.002399444580078125|unsuper_loss: 0.0
-average reward score: 5.3359375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.33%) |Training time=0.45s (21.01%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4497|ppo_ep: 1|act_loss: -0.002407073974609375|cri_loss: -0.0009021759033203125|unsuper_loss: 0.0
-average reward score: 5.28515625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.64%) |Training time=0.44s (20.70%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4498|ppo_ep: 1|act_loss: 0.01605224609375|cri_loss: 0.00878143310546875|unsuper_loss: 0.0
-average reward score: 5.14453125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.60%) |Training time=0.44s (20.18%) |Others=0.11 (5.22%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
-[2023-04-14 11:33:25,426] [INFO] [logging.py:96:log_dist] [Rank 0] step=4500, skipped=59, lr=[4.340976515346915e-06, 4.340976515346915e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:33:25,444] [INFO] [timer.py:199:stop] epoch=0/micro_step=4500/global_step=4500, RunningAvgSamplesPerSec=105.24786998748107, CurrSamplesPerSec=114.88214409214008, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:33:25,537] [INFO] [logging.py:96:log_dist] [Rank 0] step=4500, skipped=72, lr=[2.261663072347785e-06, 2.261663072347785e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4499|ppo_ep: 1|act_loss: 0.0015306472778320312|cri_loss: 0.0017213821411132812|unsuper_loss: 0.0
-average reward score: 5.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.93%) |Training time=0.44s (19.62%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.22 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4500|ppo_ep: 1|act_loss: -0.002105712890625|cri_loss: -6.4849853515625e-05|unsuper_loss: 0.0
-average reward score: 5.05078125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.59%) |Training time=0.45s (20.74%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4501|ppo_ep: 1|act_loss: 0.0303955078125|cri_loss: 0.0157470703125|unsuper_loss: 0.0
-average reward score: 6.1015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.91%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4502|ppo_ep: 1|act_loss: 0.02105712890625|cri_loss: 0.01132965087890625|unsuper_loss: 0.0
-average reward score: 4.98828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.54%) |Training time=0.45s (20.79%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4503|ppo_ep: 1|act_loss: -0.03997802734375|cri_loss: -0.0192108154296875|unsuper_loss: 0.0
-average reward score: 5.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.77%) |Training time=0.44s (20.55%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4504|ppo_ep: 1|act_loss: 0.013336181640625|cri_loss: 0.007686614990234375|unsuper_loss: 0.0
-average reward score: 5.51171875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.46%) |Training time=0.45s (20.86%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4505|ppo_ep: 1|act_loss: -0.0149078369140625|cri_loss: -0.0070953369140625|unsuper_loss: 0.0
-average reward score: 6.26171875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.42%) |Training time=0.45s (20.92%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4506|ppo_ep: 1|act_loss: 0.00888824462890625|cri_loss: 0.004749298095703125|unsuper_loss: 0.0
-average reward score: 4.53125
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.64%) |Training time=0.46s (19.69%) |Others=0.11 (4.66%)|CurSamplesPerSec=13.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4507|ppo_ep: 1|act_loss: 0.0194854736328125|cri_loss: 0.01007843017578125|unsuper_loss: 0.0
-average reward score: 5.5703125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.45s (21.12%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4508|ppo_ep: 1|act_loss: 0.06427001953125|cri_loss: 0.034271240234375|unsuper_loss: 0.0
-average reward score: 5.046875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.32%) |Training time=0.45s (21.01%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
-[2023-04-14 11:33:47,136] [INFO] [logging.py:96:log_dist] [Rank 0] step=4510, skipped=59, lr=[4.32249764648214e-06, 4.32249764648214e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:33:48,297] [INFO] [timer.py:199:stop] epoch=0/micro_step=4510/global_step=4510, RunningAvgSamplesPerSec=105.17105176081999, CurrSamplesPerSec=21.96928961487455, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:33:48,390] [INFO] [logging.py:96:log_dist] [Rank 0] step=4510, skipped=72, lr=[2.2520837240822997e-06, 2.2520837240822997e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4509|ppo_ep: 1|act_loss: -0.017822265625|cri_loss: -0.00855255126953125|unsuper_loss: 0.0
-average reward score: 5.32421875
--------------------------------------------------------------------------------------
-|E2E latency=3.32s |Gather latency=0.00s (0.00%) |Generate time=1.60s (48.26%) |Training time=1.62s (48.71%) |Others=0.10 (3.03%)|CurSamplesPerSec=9.63 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4510|ppo_ep: 1|act_loss: 0.010223388671875|cri_loss: 0.005870819091796875|unsuper_loss: 0.0
-average reward score: 6.625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.48s (22.28%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4511|ppo_ep: 1|act_loss: 0.033660888671875|cri_loss: 0.017913818359375|unsuper_loss: 0.0
-average reward score: 4.37109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.62%) |Training time=0.47s (21.72%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4512|ppo_ep: 1|act_loss: 0.00102996826171875|cri_loss: 0.00168609619140625|unsuper_loss: 0.0
-average reward score: 4.7890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.32%) |Training time=0.48s (22.06%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4513|ppo_ep: 1|act_loss: 0.00452423095703125|cri_loss: 0.0026302337646484375|unsuper_loss: 0.0
-average reward score: 5.8515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.55%) |Training time=0.47s (21.80%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4514|ppo_ep: 1|act_loss: 0.01007080078125|cri_loss: 0.005413055419921875|unsuper_loss: 0.0
-average reward score: 5.81640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (21.95%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4515|ppo_ep: 1|act_loss: 0.0038013458251953125|cri_loss: 0.00213623046875|unsuper_loss: 0.0
-average reward score: 6.515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.90%) |Training time=0.49s (22.49%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4516|ppo_ep: 1|act_loss: -0.00154876708984375|cri_loss: -0.00052642822265625|unsuper_loss: 0.0
-average reward score: 5.73046875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.18%) |Training time=0.48s (22.19%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4517|ppo_ep: 1|act_loss: 0.007537841796875|cri_loss: 0.003902435302734375|unsuper_loss: 0.0
-average reward score: 5.9765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.12%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4518|ppo_ep: 1|act_loss: -0.0220947265625|cri_loss: -0.0106658935546875|unsuper_loss: 0.0
-average reward score: 5.01171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.18%) |Training time=0.48s (22.20%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-[2023-04-14 11:34:09,955] [INFO] [logging.py:96:log_dist] [Rank 0] step=4520, skipped=59, lr=[4.304026225914465e-06, 4.304026225914465e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:34:09,973] [INFO] [timer.py:199:stop] epoch=0/micro_step=4520/global_step=4520, RunningAvgSamplesPerSec=105.16232246066636, CurrSamplesPerSec=103.59375617466904, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:34:10,066] [INFO] [logging.py:96:log_dist] [Rank 0] step=4520, skipped=72, lr=[2.2425080505340897e-06, 2.2425080505340897e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4519|ppo_ep: 1|act_loss: -0.02069091796875|cri_loss: -0.00975799560546875|unsuper_loss: 0.0
-average reward score: 6.0703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.67%) |Training time=0.47s (21.72%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4520|ppo_ep: 1|act_loss: 0.0130767822265625|cri_loss: 0.006988525390625|unsuper_loss: 0.0
-average reward score: 6.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.39%) |Training time=0.50s (21.33%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.57 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4521|ppo_ep: 1|act_loss: 0.042938232421875|cri_loss: 0.0226898193359375|unsuper_loss: 0.0
-average reward score: 4.76171875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.94%) |Training time=0.46s (21.41%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4522|ppo_ep: 1|act_loss: -0.0004248619079589844|cri_loss: 0.0001125335693359375|unsuper_loss: 0.0
-average reward score: 6.875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.65%) |Training time=0.47s (21.72%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4523|ppo_ep: 1|act_loss: 0.03631591796875|cri_loss: 0.01885986328125|unsuper_loss: 0.0
-average reward score: 5.453125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.86%) |Training time=0.49s (22.51%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4524|ppo_ep: 1|act_loss: -0.006473541259765625|cri_loss: -0.003021240234375|unsuper_loss: 0.0
-average reward score: 5.91015625
--------------------------------------------------------------------------------------
-|E2E latency=3.23s |Gather latency=0.00s (0.00%) |Generate time=1.60s (49.51%) |Training time=0.47s (14.71%) |Others=1.16 (35.78%)|CurSamplesPerSec=9.91 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4525|ppo_ep: 1|act_loss: -0.0032958984375|cri_loss: -0.0013866424560546875|unsuper_loss: 0.0
-average reward score: 5.44921875
--------------------------------------------------------------------------------------
-|E2E latency=2.08s |Gather latency=0.00s (0.00%) |Generate time=1.59s (76.58%) |Training time=0.39s (18.58%) |Others=0.10 (4.84%)|CurSamplesPerSec=15.38 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4526|ppo_ep: 1|act_loss: 0.0804443359375|cri_loss: 0.042755126953125|unsuper_loss: 0.0
-average reward score: 5.18359375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.12%) |Training time=0.48s (22.10%) |Others=0.10 (4.78%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4527|ppo_ep: 1|act_loss: 0.0029087066650390625|cri_loss: 0.002635955810546875|unsuper_loss: 0.0
-average reward score: 6.203125
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.66%) |Training time=0.48s (21.78%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4528|ppo_ep: 1|act_loss: -0.0214691162109375|cri_loss: -0.0103302001953125|unsuper_loss: 0.0
-average reward score: 6.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.68s (73.67%) |Training time=0.49s (21.59%) |Others=0.11 (4.74%)|CurSamplesPerSec=14.01 |AvgSamplesPerSec=14.46
-[2023-04-14 11:34:33,000] [INFO] [logging.py:96:log_dist] [Rank 0] step=4530, skipped=59, lr=[4.285562527434903e-06, 4.285562527434903e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:34:33,018] [INFO] [timer.py:199:stop] epoch=0/micro_step=4530/global_step=4530, RunningAvgSamplesPerSec=105.15677733092694, CurrSamplesPerSec=97.51551941225235, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:34:33,111] [INFO] [logging.py:96:log_dist] [Rank 0] step=4530, skipped=72, lr=[2.2329361936377355e-06, 2.2329361936377355e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4529|ppo_ep: 1|act_loss: -0.037109375|cri_loss: -0.0172882080078125|unsuper_loss: 0.0
-average reward score: 4.9453125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.89%) |Training time=0.49s (22.53%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4530|ppo_ep: 1|act_loss: 0.021636962890625|cri_loss: 0.011260986328125|unsuper_loss: 0.0
-average reward score: 4.91796875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.95%) |Training time=0.49s (22.43%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4531|ppo_ep: 1|act_loss: -0.01393890380859375|cri_loss: -0.0066986083984375|unsuper_loss: 0.0
-average reward score: 4.734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.17%) |Training time=0.48s (22.08%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4532|ppo_ep: 1|act_loss: 0.01395416259765625|cri_loss: 0.007312774658203125|unsuper_loss: 0.0
-average reward score: 5.16796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.00%) |Training time=0.49s (22.38%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4533|ppo_ep: 1|act_loss: 0.015045166015625|cri_loss: 0.0084991455078125|unsuper_loss: 0.0
-average reward score: 5.73046875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.86%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4534|ppo_ep: 1|act_loss: 0.0009570121765136719|cri_loss: 0.0007576942443847656|unsuper_loss: 0.0
-average reward score: 5.8203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.10%) |Training time=0.48s (22.28%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4535|ppo_ep: 1|act_loss: -0.05126953125|cri_loss: -0.0240936279296875|unsuper_loss: 0.0
-average reward score: 5.32421875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.69%) |Training time=0.48s (21.86%) |Others=0.12 (5.45%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4536|ppo_ep: 1|act_loss: 0.00452423095703125|cri_loss: 0.0024509429931640625|unsuper_loss: 0.0
-average reward score: 5.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.73s (74.96%) |Training time=0.47s (20.37%) |Others=0.11 (4.67%)|CurSamplesPerSec=13.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4537|ppo_ep: 1|act_loss: -0.0042266845703125|cri_loss: -0.0018224716186523438|unsuper_loss: 0.0
-average reward score: 5.3671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.19%) |Training time=0.48s (22.18%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-[2023-04-14 11:34:52,784] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 4538|ppo_ep: 1|act_loss: 0.03857421875|cri_loss: 0.019989013671875|unsuper_loss: 0.0
-average reward score: 5.02734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.48s (22.22%) |Others=0.09 (4.19%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-[2023-04-14 11:34:54,843] [INFO] [logging.py:96:log_dist] [Rank 0] step=4540, skipped=59, lr=[4.267106824720006e-06, 4.267106824720006e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:34:54,862] [INFO] [timer.py:199:stop] epoch=0/micro_step=4540/global_step=4540, RunningAvgSamplesPerSec=105.14597895413151, CurrSamplesPerSec=97.63909587372902, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:34:54,945] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-[2023-04-14 11:34:54,946] [INFO] [logging.py:96:log_dist] [Rank 0] step=4540, skipped=74, lr=[2.2252815514539662e-06, 2.2252815514539662e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4539|ppo_ep: 1|act_loss: -0.02557373046875|cri_loss: -0.0123748779296875|unsuper_loss: 0.0
-average reward score: 5.3984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.18%) |Training time=0.49s (22.68%) |Others=0.09 (4.14%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4540|ppo_ep: 1|act_loss: -0.005657196044921875|cri_loss: -0.0024356842041015625|unsuper_loss: 0.0
-average reward score: 5.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.71%) |Training time=0.49s (22.44%) |Others=0.11 (4.85%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4541|ppo_ep: 1|act_loss: 0.0125885009765625|cri_loss: 0.006679534912109375|unsuper_loss: 0.0
-average reward score: 5.93359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.06%) |Training time=0.48s (22.31%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4542|ppo_ep: 1|act_loss: -0.0191192626953125|cri_loss: -0.0088653564453125|unsuper_loss: 0.0
-average reward score: 5.3515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.99%) |Training time=0.48s (22.17%) |Others=0.10 (4.84%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4543|ppo_ep: 1|act_loss: -0.011566162109375|cri_loss: -0.005367279052734375|unsuper_loss: 0.0
-average reward score: 5.796875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.13%) |Training time=0.48s (22.24%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4544|ppo_ep: 1|act_loss: 0.010528564453125|cri_loss: 0.00630950927734375|unsuper_loss: 0.0
-average reward score: 4.859375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.27%) |Training time=0.48s (22.11%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4545|ppo_ep: 1|act_loss: -0.0128631591796875|cri_loss: -0.005611419677734375|unsuper_loss: 0.0
-average reward score: 5.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.16%) |Training time=0.48s (22.21%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4546|ppo_ep: 1|act_loss: 0.047515869140625|cri_loss: 0.02423095703125|unsuper_loss: 0.0
-average reward score: 5.83203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.14%) |Training time=0.48s (22.23%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4547|ppo_ep: 1|act_loss: 0.0244140625|cri_loss: 0.01294708251953125|unsuper_loss: 0.0
-average reward score: 4.8515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.12%) |Training time=0.48s (22.26%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4548|ppo_ep: 1|act_loss: 0.0213470458984375|cri_loss: 0.0111083984375|unsuper_loss: 0.0
-average reward score: 4.62890625
--------------------------------------------------------------------------------------
-|E2E latency=2.84s |Gather latency=0.00s (0.00%) |Generate time=1.58s (55.63%) |Training time=0.48s (16.89%) |Others=0.78 (27.49%)|CurSamplesPerSec=11.25 |AvgSamplesPerSec=14.46
-[2023-04-14 11:35:17,159] [INFO] [logging.py:96:log_dist] [Rank 0] step=4550, skipped=59, lr=[4.248659391327812e-06, 4.248659391327812e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:35:17,177] [INFO] [timer.py:199:stop] epoch=0/micro_step=4550/global_step=4550, RunningAvgSamplesPerSec=105.13486238929258, CurrSamplesPerSec=100.88547102857268, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:35:17,270] [INFO] [logging.py:96:log_dist] [Rank 0] step=4550, skipped=74, lr=[2.2157169220241807e-06, 2.2157169220241807e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4549|ppo_ep: 1|act_loss: -0.029541015625|cri_loss: -0.01416015625|unsuper_loss: 0.0
-average reward score: 4.4453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.14%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4550|ppo_ep: 1|act_loss: 0.0008153915405273438|cri_loss: 0.000988006591796875|unsuper_loss: 0.0
-average reward score: 5.0234375
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.12%) |Training time=0.48s (20.57%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4551|ppo_ep: 1|act_loss: 0.0382080078125|cri_loss: 0.0198822021484375|unsuper_loss: 0.0
-average reward score: 4.80859375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.52%) |Training time=0.48s (21.87%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4552|ppo_ep: 1|act_loss: 0.005889892578125|cri_loss: 0.00328826904296875|unsuper_loss: 0.0
-average reward score: 5.94921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.48s (21.94%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4553|ppo_ep: 1|act_loss: 0.007633209228515625|cri_loss: 0.00445556640625|unsuper_loss: 0.0
-average reward score: 5.875
--------------------------------------------------------------------------------------
-|E2E latency=2.51s |Gather latency=0.00s (0.00%) |Generate time=1.60s (63.76%) |Training time=0.46s (18.25%) |Others=0.45 (17.99%)|CurSamplesPerSec=12.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4554|ppo_ep: 1|act_loss: -0.04290771484375|cri_loss: -0.01824951171875|unsuper_loss: 0.0
-average reward score: 4.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.18%) |Training time=0.48s (22.17%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4555|ppo_ep: 1|act_loss: -0.011505126953125|cri_loss: -0.00542449951171875|unsuper_loss: 0.0
-average reward score: 4.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.93%) |Training time=0.49s (22.49%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4556|ppo_ep: 1|act_loss: 0.023284912109375|cri_loss: 0.01212310791015625|unsuper_loss: 0.0
-average reward score: 4.88671875
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.62s (71.91%) |Training time=0.49s (21.71%) |Others=0.14 (6.38%)|CurSamplesPerSec=14.20 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4557|ppo_ep: 1|act_loss: 0.02960205078125|cri_loss: 0.016326904296875|unsuper_loss: 0.0
-average reward score: 5.91015625
--------------------------------------------------------------------------------------
-|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.21%) |Training time=0.50s (22.29%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.37 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4558|ppo_ep: 1|act_loss: 0.0092010498046875|cri_loss: 0.005279541015625|unsuper_loss: 0.0
-average reward score: 5.0625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.32%) |Training time=0.48s (22.02%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-[2023-04-14 11:35:39,518] [INFO] [logging.py:96:log_dist] [Rank 0] step=4560, skipped=59, lr=[4.230220500693783e-06, 4.230220500693783e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:35:39,838] [INFO] [timer.py:199:stop] epoch=0/micro_step=4560/global_step=4560, RunningAvgSamplesPerSec=105.10160046034554, CurrSamplesPerSec=51.63946257422192, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:35:39,931] [INFO] [logging.py:96:log_dist] [Rank 0] step=4560, skipped=74, lr=[2.2061565063554063e-06, 2.2061565063554063e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4559|ppo_ep: 1|act_loss: -0.020111083984375|cri_loss: -0.00907135009765625|unsuper_loss: 0.0
-average reward score: 4.53125
--------------------------------------------------------------------------------------
-|E2E latency=2.47s |Gather latency=0.00s (0.00%) |Generate time=1.59s (64.27%) |Training time=0.78s (31.69%) |Others=0.10 (4.04%)|CurSamplesPerSec=12.95 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4560|ppo_ep: 1|act_loss: -0.028472900390625|cri_loss: -0.01371002197265625|unsuper_loss: 0.0
-average reward score: 5.25
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.82%) |Training time=0.49s (22.56%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4561|ppo_ep: 1|act_loss: -0.017730712890625|cri_loss: -0.00830078125|unsuper_loss: 0.0
-average reward score: 4.1875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.17%) |Training time=0.48s (22.20%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4562|ppo_ep: 1|act_loss: 0.00510406494140625|cri_loss: 0.0028438568115234375|unsuper_loss: 0.0
-average reward score: 4.01171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.96%) |Training time=0.49s (22.42%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4563|ppo_ep: 1|act_loss: -0.00669097900390625|cri_loss: -0.003200531005859375|unsuper_loss: 0.0
-average reward score: 5.5625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.18%) |Training time=0.49s (22.25%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4564|ppo_ep: 1|act_loss: 0.0183563232421875|cri_loss: 0.009552001953125|unsuper_loss: 0.0
-average reward score: 4.5859375
--------------------------------------------------------------------------------------
-|E2E latency=2.62s |Gather latency=0.00s (0.00%) |Generate time=1.75s (66.91%) |Training time=0.51s (19.64%) |Others=0.35 (13.45%)|CurSamplesPerSec=12.22 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4565|ppo_ep: 1|act_loss: 0.029815673828125|cri_loss: 0.0155181884765625|unsuper_loss: 0.0
-average reward score: 6.4609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.13%) |Training time=0.48s (22.24%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4566|ppo_ep: 1|act_loss: 0.0841064453125|cri_loss: 0.044769287109375|unsuper_loss: 0.0
-average reward score: 4.203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.97%) |Training time=0.49s (22.42%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4567|ppo_ep: 1|act_loss: 0.030364990234375|cri_loss: 0.01641845703125|unsuper_loss: 0.0
-average reward score: 5.890625
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.51%) |Training time=0.50s (22.93%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4568|ppo_ep: 1|act_loss: -0.068603515625|cri_loss: -0.032928466796875|unsuper_loss: 0.0
-average reward score: 4.90625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.60%) |Training time=0.50s (22.81%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.46
-[2023-04-14 11:36:02,033] [INFO] [logging.py:96:log_dist] [Rank 0] step=4570, skipped=59, lr=[4.2117904261267626e-06, 4.2117904261267626e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:36:02,051] [INFO] [timer.py:199:stop] epoch=0/micro_step=4570/global_step=4570, RunningAvgSamplesPerSec=105.08257705936522, CurrSamplesPerSec=98.051808641592, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:36:02,144] [INFO] [logging.py:96:log_dist] [Rank 0] step=4570, skipped=74, lr=[2.1966004461560667e-06, 2.1966004461560667e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4569|ppo_ep: 1|act_loss: -0.0516357421875|cri_loss: -0.0239410400390625|unsuper_loss: 0.0
-average reward score: 4.5859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.89%) |Training time=0.49s (22.52%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4570|ppo_ep: 1|act_loss: 0.005802154541015625|cri_loss: 0.0033092498779296875|unsuper_loss: 0.0
-average reward score: 6.0078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.93%) |Training time=0.49s (22.46%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4571|ppo_ep: 1|act_loss: 0.0352783203125|cri_loss: 0.0202178955078125|unsuper_loss: 0.0
-average reward score: 4.08984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.21%) |Training time=0.48s (22.15%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4572|ppo_ep: 1|act_loss: 0.0212554931640625|cri_loss: 0.01184844970703125|unsuper_loss: 0.0
-average reward score: 4.58203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.10%) |Training time=0.48s (22.30%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4573|ppo_ep: 1|act_loss: 0.04534912109375|cri_loss: 0.0251312255859375|unsuper_loss: 0.0
-average reward score: 4.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.18%) |Training time=0.48s (22.20%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4574|ppo_ep: 1|act_loss: -0.0034160614013671875|cri_loss: -0.0014438629150390625|unsuper_loss: 0.0
-average reward score: 6.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.48s (22.35%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4575|ppo_ep: 1|act_loss: -0.0118560791015625|cri_loss: -0.002506256103515625|unsuper_loss: 0.0
-average reward score: 4.6484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.02%) |Training time=0.49s (22.38%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4576|ppo_ep: 1|act_loss: -0.0187225341796875|cri_loss: -0.007053375244140625|unsuper_loss: 0.0
-average reward score: 5.078125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.75%) |Training time=0.48s (22.23%) |Others=0.11 (5.02%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4577|ppo_ep: 1|act_loss: 0.0162353515625|cri_loss: 0.0099639892578125|unsuper_loss: 0.0
-average reward score: 4.25
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.14%) |Training time=0.48s (22.24%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4578|ppo_ep: 1|act_loss: -0.002105712890625|cri_loss: -0.0005311965942382812|unsuper_loss: 0.0
-average reward score: 5.73046875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.26%) |Training time=0.48s (22.13%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-[2023-04-14 11:36:23,920] [INFO] [logging.py:96:log_dist] [Rank 0] step=4580, skipped=59, lr=[4.193369440804912e-06, 4.193369440804912e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:36:23,938] [INFO] [timer.py:199:stop] epoch=0/micro_step=4580/global_step=4580, RunningAvgSamplesPerSec=105.06683239626007, CurrSamplesPerSec=86.84876269801265, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:36:24,031] [INFO] [logging.py:96:log_dist] [Rank 0] step=4580, skipped=74, lr=[2.1870488830700263e-06, 2.1870488830700263e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4579|ppo_ep: 1|act_loss: 0.02239990234375|cri_loss: 0.011810302734375|unsuper_loss: 0.0
-average reward score: 5.25390625
--------------------------------------------------------------------------------------
-|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.72s (72.70%) |Training time=0.54s (23.00%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.55 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4580|ppo_ep: 1|act_loss: -0.0028533935546875|cri_loss: -0.0010128021240234375|unsuper_loss: 0.0
-average reward score: 5.0625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.18%) |Training time=0.48s (22.18%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4581|ppo_ep: 1|act_loss: -0.03582763671875|cri_loss: -0.017059326171875|unsuper_loss: 0.0
-average reward score: 4.87109375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.48s (21.94%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4582|ppo_ep: 1|act_loss: -0.0109100341796875|cri_loss: -0.0051116943359375|unsuper_loss: 0.0
-average reward score: 4.60546875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.93%) |Training time=0.49s (22.44%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4583|ppo_ep: 1|act_loss: -0.0345458984375|cri_loss: -0.015655517578125|unsuper_loss: 0.0
-average reward score: 5.1875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.24%) |Training time=0.46s (21.11%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4584|ppo_ep: 1|act_loss: 0.0039215087890625|cri_loss: 0.002899169921875|unsuper_loss: 0.0
-average reward score: 5.97265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.88%) |Training time=0.47s (21.47%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4585|ppo_ep: 1|act_loss: 0.016510009765625|cri_loss: 0.008544921875|unsuper_loss: 0.0
-average reward score: 5.85546875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.28%) |Training time=0.46s (21.07%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4586|ppo_ep: 1|act_loss: 0.052734375|cri_loss: 0.0289154052734375|unsuper_loss: 0.0
-average reward score: 5.7890625
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.90%) |Training time=0.47s (20.66%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.16 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4587|ppo_ep: 1|act_loss: 0.012481689453125|cri_loss: 0.006565093994140625|unsuper_loss: 0.0
-average reward score: 5.6953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.69%) |Training time=0.47s (21.69%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4588|ppo_ep: 1|act_loss: 0.00595855712890625|cri_loss: 0.00421905517578125|unsuper_loss: 0.0
-average reward score: 5.51953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.47s (21.78%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
-[2023-04-14 11:36:45,723] [INFO] [logging.py:96:log_dist] [Rank 0] step=4590, skipped=59, lr=[4.174957817771677e-06, 4.174957817771677e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:36:45,741] [INFO] [timer.py:199:stop] epoch=0/micro_step=4590/global_step=4590, RunningAvgSamplesPerSec=105.06351756370773, CurrSamplesPerSec=102.14213537144157, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:36:45,834] [INFO] [logging.py:96:log_dist] [Rank 0] step=4590, skipped=74, lr=[2.1775019586744924e-06, 2.1775019586744924e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4589|ppo_ep: 1|act_loss: -0.00743865966796875|cri_loss: -0.003204345703125|unsuper_loss: 0.0
-average reward score: 4.12890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (21.98%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4590|ppo_ep: 1|act_loss: 0.003932952880859375|cri_loss: 0.0020923614501953125|unsuper_loss: 0.0
-average reward score: 5.2578125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.87%) |Training time=0.47s (21.52%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4591|ppo_ep: 1|act_loss: -0.0245361328125|cri_loss: -0.01166534423828125|unsuper_loss: 0.0
-average reward score: 6.515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.87%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4592|ppo_ep: 1|act_loss: -0.0102691650390625|cri_loss: -0.0049285888671875|unsuper_loss: 0.0
-average reward score: 4.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.48s (21.97%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4593|ppo_ep: 1|act_loss: -0.0169525146484375|cri_loss: -0.0081939697265625|unsuper_loss: 0.0
-average reward score: 5.5
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.67%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4594|ppo_ep: 1|act_loss: -0.0111541748046875|cri_loss: -0.0048980712890625|unsuper_loss: 0.0
-average reward score: 5.796875
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.48%) |Training time=0.47s (20.22%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4595|ppo_ep: 1|act_loss: -0.0110931396484375|cri_loss: -0.00519561767578125|unsuper_loss: 0.0
-average reward score: 5.21875
--------------------------------------------------------------------------------------
-|E2E latency=2.45s |Gather latency=0.00s (0.00%) |Generate time=1.59s (65.04%) |Training time=0.47s (19.26%) |Others=0.38 (15.71%)|CurSamplesPerSec=13.08 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4596|ppo_ep: 1|act_loss: 0.0110931396484375|cri_loss: 0.005840301513671875|unsuper_loss: 0.0
-average reward score: 6.8671875
--------------------------------------------------------------------------------------
-|E2E latency=2.07s |Gather latency=0.00s (0.00%) |Generate time=1.59s (76.53%) |Training time=0.39s (18.65%) |Others=0.10 (4.82%)|CurSamplesPerSec=15.44 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4597|ppo_ep: 1|act_loss: 0.009765625|cri_loss: 0.005664825439453125|unsuper_loss: 0.0
-average reward score: 5.7421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.62%) |Training time=0.47s (21.73%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4598|ppo_ep: 1|act_loss: 0.00322723388671875|cri_loss: 0.0019207000732421875|unsuper_loss: 0.0
-average reward score: 5.421875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.90%) |Training time=0.47s (21.47%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
-[2023-04-14 11:37:07,765] [INFO] [logging.py:96:log_dist] [Rank 0] step=4600, skipped=59, lr=[4.156555829931723e-06, 4.156555829931723e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:37:07,783] [INFO] [timer.py:199:stop] epoch=0/micro_step=4600/global_step=4600, RunningAvgSamplesPerSec=105.06512917590997, CurrSamplesPerSec=95.71679557963769, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:37:07,876] [INFO] [logging.py:96:log_dist] [Rank 0] step=4600, skipped=74, lr=[2.167959814477915e-06, 2.167959814477915e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4599|ppo_ep: 1|act_loss: 0.0211944580078125|cri_loss: 0.011505126953125|unsuper_loss: 0.0
-average reward score: 5.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.69%) |Training time=0.50s (22.75%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4600|ppo_ep: 1|act_loss: 0.0211334228515625|cri_loss: 0.010833740234375|unsuper_loss: 0.0
-average reward score: 5.8984375
--------------------------------------------------------------------------------------
-|E2E latency=2.96s |Gather latency=0.00s (0.00%) |Generate time=1.58s (53.31%) |Training time=0.50s (16.89%) |Others=0.88 (29.80%)|CurSamplesPerSec=10.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4601|ppo_ep: 1|act_loss: -0.0022735595703125|cri_loss: -0.0004930496215820312|unsuper_loss: 0.0
-average reward score: 5.046875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.67%) |Training time=0.49s (22.71%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4602|ppo_ep: 1|act_loss: 0.00644683837890625|cri_loss: 0.0035648345947265625|unsuper_loss: 0.0
-average reward score: 5.68359375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.65%) |Training time=0.49s (22.73%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4603|ppo_ep: 1|act_loss: 0.0165252685546875|cri_loss: 0.0088958740234375|unsuper_loss: 0.0
-average reward score: 5.25390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.97%) |Training time=0.49s (22.35%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4604|ppo_ep: 1|act_loss: -0.01346588134765625|cri_loss: -0.00623321533203125|unsuper_loss: 0.0
-average reward score: 5.3984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.75%) |Training time=0.49s (22.65%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4605|ppo_ep: 1|act_loss: 0.021270751953125|cri_loss: 0.01096343994140625|unsuper_loss: 0.0
-average reward score: 5.01171875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.56%) |Training time=0.50s (22.83%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4606|ppo_ep: 1|act_loss: -0.000988006591796875|cri_loss: -0.00018930435180664062|unsuper_loss: 0.0
-average reward score: 4.8046875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.19%) |Training time=0.48s (22.17%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4607|ppo_ep: 1|act_loss: -0.0102691650390625|cri_loss: -0.0048065185546875|unsuper_loss: 0.0
-average reward score: 6.5546875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.78%) |Training time=0.49s (22.63%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4608|ppo_ep: 1|act_loss: -0.01401519775390625|cri_loss: -0.00673675537109375|unsuper_loss: 0.0
-average reward score: 5.94921875
--------------------------------------------------------------------------------------
-|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.58s (69.12%) |Training time=0.52s (22.89%) |Others=0.18 (7.99%)|CurSamplesPerSec=13.99 |AvgSamplesPerSec=14.46
-[2023-04-14 11:37:30,415] [INFO] [logging.py:96:log_dist] [Rank 0] step=4610, skipped=59, lr=[4.138163750046903e-06, 4.138163750046903e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:37:30,433] [INFO] [timer.py:199:stop] epoch=0/micro_step=4610/global_step=4610, RunningAvgSamplesPerSec=105.04432744786926, CurrSamplesPerSec=99.1981883570604, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:37:30,526] [INFO] [logging.py:96:log_dist] [Rank 0] step=4610, skipped=74, lr=[2.158422591917891e-06, 2.158422591917891e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4609|ppo_ep: 1|act_loss: -0.00725555419921875|cri_loss: -0.0033588409423828125|unsuper_loss: 0.0
-average reward score: 5.1484375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.07%) |Training time=0.49s (22.31%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4610|ppo_ep: 1|act_loss: -0.04510498046875|cri_loss: -0.021087646484375|unsuper_loss: 0.0
-average reward score: 5.828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.76%) |Training time=0.49s (22.64%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4611|ppo_ep: 1|act_loss: 0.01213836669921875|cri_loss: 0.0068511962890625|unsuper_loss: 0.0
-average reward score: 6.0078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.93%) |Training time=0.49s (22.43%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4612|ppo_ep: 1|act_loss: 0.027618408203125|cri_loss: 0.0142364501953125|unsuper_loss: 0.0
-average reward score: 4.88671875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.07%) |Training time=0.49s (22.30%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4613|ppo_ep: 1|act_loss: 0.021514892578125|cri_loss: 0.01171875|unsuper_loss: 0.0
-average reward score: 5.97265625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.97%) |Training time=0.49s (22.42%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46
-[2023-04-14 11:37:41,322] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 4614|ppo_ep: 1|act_loss: 0.03173828125|cri_loss: 0.0161895751953125|unsuper_loss: 0.0
-average reward score: 5.9921875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.21%) |Training time=0.46s (20.90%) |Others=0.11 (4.88%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.46
-[2023-04-14 11:37:43,569] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 4615|ppo_ep: 1|act_loss: -0.00678253173828125|cri_loss: -0.0028896331787109375|unsuper_loss: 0.0
-average reward score: 4.5859375
--------------------------------------------------------------------------------------
-|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.68s (75.08%) |Training time=0.46s (20.46%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.29 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4616|ppo_ep: 1|act_loss: -0.0020618438720703125|cri_loss: -0.0008721351623535156|unsuper_loss: 0.0
-average reward score: 5.4765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.32%) |Training time=0.48s (22.07%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4617|ppo_ep: 1|act_loss: 0.01251220703125|cri_loss: 0.006763458251953125|unsuper_loss: 0.0
-average reward score: 5.57421875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.78%) |Training time=0.49s (22.64%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4618|ppo_ep: 1|act_loss: 0.0031261444091796875|cri_loss: 0.0018663406372070312|unsuper_loss: 0.0
-average reward score: 4.828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.48s (21.94%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-[2023-04-14 11:37:52,245] [INFO] [logging.py:96:log_dist] [Rank 0] step=4620, skipped=61, lr=[4.123457403069056e-06, 4.123457403069056e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:37:52,264] [INFO] [timer.py:199:stop] epoch=0/micro_step=4620/global_step=4620, RunningAvgSamplesPerSec=105.03470887489762, CurrSamplesPerSec=100.67064495708186, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:37:52,356] [INFO] [logging.py:96:log_dist] [Rank 0] step=4620, skipped=74, lr=[2.1488904323590644e-06, 2.1488904323590644e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4619|ppo_ep: 1|act_loss: -0.00931549072265625|cri_loss: -0.00424957275390625|unsuper_loss: 0.0
-average reward score: 5.91796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.28%) |Training time=0.48s (22.14%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4620|ppo_ep: 1|act_loss: -0.026458740234375|cri_loss: -0.011993408203125|unsuper_loss: 0.0
-average reward score: 5.3515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.18%) |Training time=0.48s (22.21%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4621|ppo_ep: 1|act_loss: -0.027191162109375|cri_loss: -0.01263427734375|unsuper_loss: 0.0
-average reward score: 5.39453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.48s (22.38%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4622|ppo_ep: 1|act_loss: 0.015960693359375|cri_loss: 0.00853729248046875|unsuper_loss: 0.0
-average reward score: 5.46484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.68%) |Training time=0.49s (22.72%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4623|ppo_ep: 1|act_loss: 0.032012939453125|cri_loss: 0.0174407958984375|unsuper_loss: 0.0
-average reward score: 6.78125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.48s (22.37%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4624|ppo_ep: 1|act_loss: 0.031524658203125|cri_loss: 0.0169219970703125|unsuper_loss: 0.0
-average reward score: 3.92578125
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.88%) |Training time=0.49s (20.82%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.68 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4625|ppo_ep: 1|act_loss: 0.0192718505859375|cri_loss: 0.0108184814453125|unsuper_loss: 0.0
-average reward score: 6.53125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.08%) |Training time=0.48s (22.30%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4626|ppo_ep: 1|act_loss: 0.014068603515625|cri_loss: 0.0085296630859375|unsuper_loss: 0.0
-average reward score: 6.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.86%) |Training time=0.49s (22.52%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4627|ppo_ep: 1|act_loss: 0.0254974365234375|cri_loss: 0.01367950439453125|unsuper_loss: 0.0
-average reward score: 5.01953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.86%) |Training time=0.49s (22.53%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4628|ppo_ep: 1|act_loss: 0.06097412109375|cri_loss: 0.032196044921875|unsuper_loss: 0.0
-average reward score: 6.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.01%) |Training time=0.46s (21.36%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
-[2023-04-14 11:38:14,122] [INFO] [logging.py:96:log_dist] [Rank 0] step=4630, skipped=61, lr=[4.1050838443919875e-06, 4.1050838443919875e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:38:14,140] [INFO] [timer.py:199:stop] epoch=0/micro_step=4630/global_step=4630, RunningAvgSamplesPerSec=105.02180375590667, CurrSamplesPerSec=99.11819206302553, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:38:14,233] [INFO] [logging.py:96:log_dist] [Rank 0] step=4630, skipped=74, lr=[2.1393634770910363e-06, 2.1393634770910363e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4629|ppo_ep: 1|act_loss: 0.0362548828125|cri_loss: 0.0199737548828125|unsuper_loss: 0.0
-average reward score: 4.56640625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.10%) |Training time=0.49s (22.32%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4630|ppo_ep: 1|act_loss: 0.004764556884765625|cri_loss: 0.003452301025390625|unsuper_loss: 0.0
-average reward score: 4.5546875
--------------------------------------------------------------------------------------
-|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.59s (75.43%) |Training time=0.42s (19.79%) |Others=0.10 (4.77%)|CurSamplesPerSec=15.19 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4631|ppo_ep: 1|act_loss: 0.002178192138671875|cri_loss: 0.0012903213500976562|unsuper_loss: 0.0
-average reward score: 5.15234375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.31%) |Training time=0.48s (22.08%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4632|ppo_ep: 1|act_loss: -0.0230865478515625|cri_loss: -0.01128387451171875|unsuper_loss: 0.0
-average reward score: 5.0703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.14%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4633|ppo_ep: 1|act_loss: -0.037933349609375|cri_loss: -0.0178375244140625|unsuper_loss: 0.0
-average reward score: 4.98828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.48s (21.97%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4634|ppo_ep: 1|act_loss: 0.00516510009765625|cri_loss: 0.0031719207763671875|unsuper_loss: 0.0
-average reward score: 4.69140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.52%) |Training time=0.47s (21.85%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4635|ppo_ep: 1|act_loss: -0.004116058349609375|cri_loss: -0.0011539459228515625|unsuper_loss: 0.0
-average reward score: 5.58984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.48s (21.97%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4636|ppo_ep: 1|act_loss: -0.0214996337890625|cri_loss: -0.01045989990234375|unsuper_loss: 0.0
-average reward score: 5.44140625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.19%) |Training time=0.48s (22.20%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4637|ppo_ep: 1|act_loss: -0.0006070137023925781|cri_loss: -5.626678466796875e-05|unsuper_loss: 0.0
-average reward score: 5.19921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.84%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4638|ppo_ep: 1|act_loss: -0.007564544677734375|cri_loss: -0.00339508056640625|unsuper_loss: 0.0
-average reward score: 6.296875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.34%) |Training time=0.48s (22.05%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-[2023-04-14 11:38:36,000] [INFO] [logging.py:96:log_dist] [Rank 0] step=4640, skipped=61, lr=[4.086720956609049e-06, 4.086720956609049e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:38:36,018] [INFO] [timer.py:199:stop] epoch=0/micro_step=4640/global_step=4640, RunningAvgSamplesPerSec=105.01773699493148, CurrSamplesPerSec=98.99807414433666, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:38:36,111] [INFO] [logging.py:96:log_dist] [Rank 0] step=4640, skipped=74, lr=[2.1298418673262655e-06, 2.1298418673262655e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4639|ppo_ep: 1|act_loss: 0.024658203125|cri_loss: 0.01282501220703125|unsuper_loss: 0.0
-average reward score: 4.68359375
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.98%) |Training time=0.49s (20.77%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.46
-[2023-04-14 11:38:38,285] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 4640|ppo_ep: 1|act_loss: 0.0004324913024902344|cri_loss: 0.0012264251708984375|unsuper_loss: 0.0
-average reward score: 4.83203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.49s (22.46%) |Others=0.09 (4.16%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-[2023-04-14 11:38:40,445] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 4641|ppo_ep: 1|act_loss: 0.0021190643310546875|cri_loss: 0.0013751983642578125|unsuper_loss: 0.0
-average reward score: 5.32421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.70%) |Training time=0.48s (22.16%) |Others=0.09 (4.14%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4642|ppo_ep: 1|act_loss: 0.04876708984375|cri_loss: 0.0262603759765625|unsuper_loss: 0.0
-average reward score: 5.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.21%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4643|ppo_ep: 1|act_loss: 0.0234375|cri_loss: 0.011993408203125|unsuper_loss: 0.0
-average reward score: 6.7265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.67%) |Training time=0.43s (19.68%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4644|ppo_ep: 1|act_loss: -0.04034423828125|cri_loss: -0.018829345703125|unsuper_loss: 0.0
-average reward score: 6.83203125
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.79%) |Training time=0.47s (20.79%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.12 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4645|ppo_ep: 1|act_loss: -0.008087158203125|cri_loss: -0.0038051605224609375|unsuper_loss: 0.0
-average reward score: 4.734375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.84%) |Training time=0.47s (21.57%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4646|ppo_ep: 1|act_loss: 0.0191802978515625|cri_loss: 0.009796142578125|unsuper_loss: 0.0
-average reward score: 5.59375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.15%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4647|ppo_ep: 1|act_loss: 0.001789093017578125|cri_loss: 0.0009784698486328125|unsuper_loss: 0.0
-average reward score: 5.765625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.75%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4648|ppo_ep: 1|act_loss: 0.019317626953125|cri_loss: 0.010040283203125|unsuper_loss: 0.0
-average reward score: 5.015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.47s (21.93%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-[2023-04-14 11:38:57,764] [INFO] [logging.py:96:log_dist] [Rank 0] step=4650, skipped=61, lr=[4.068369011902537e-06, 4.068369011902537e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:38:57,782] [INFO] [timer.py:199:stop] epoch=0/micro_step=4650/global_step=4650, RunningAvgSamplesPerSec=105.01619755698859, CurrSamplesPerSec=101.73921361882245, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:38:57,875] [INFO] [logging.py:96:log_dist] [Rank 0] step=4650, skipped=76, lr=[2.122228523121039e-06, 2.122228523121039e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4649|ppo_ep: 1|act_loss: 0.0022907257080078125|cri_loss: 0.00200653076171875|unsuper_loss: 0.0
-average reward score: 5.609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (22.02%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4650|ppo_ep: 1|act_loss: -0.032958984375|cri_loss: -0.0160064697265625|unsuper_loss: 0.0
-average reward score: 5.38671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.75%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4651|ppo_ep: 1|act_loss: -0.0117034912109375|cri_loss: -0.00518798828125|unsuper_loss: 0.0
-average reward score: 5.9296875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.89%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4652|ppo_ep: 1|act_loss: 0.003528594970703125|cri_loss: 0.002811431884765625|unsuper_loss: 0.0
-average reward score: 5.703125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.33%) |Training time=0.48s (22.04%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4653|ppo_ep: 1|act_loss: -0.0008487701416015625|cri_loss: -0.0002982616424560547|unsuper_loss: 0.0
-average reward score: 5.48046875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.81%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4654|ppo_ep: 1|act_loss: 0.020538330078125|cri_loss: 0.010650634765625|unsuper_loss: 0.0
-average reward score: 4.80078125
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.59s (69.68%) |Training time=0.59s (25.94%) |Others=0.10 (4.38%)|CurSamplesPerSec=14.03 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4655|ppo_ep: 1|act_loss: 0.006198883056640625|cri_loss: 0.003208160400390625|unsuper_loss: 0.0
-average reward score: 5.26171875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.51%) |Training time=0.47s (21.86%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4656|ppo_ep: 1|act_loss: 0.007274627685546875|cri_loss: 0.003936767578125|unsuper_loss: 0.0
-average reward score: 4.94921875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.13%) |Training time=0.46s (21.21%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4657|ppo_ep: 1|act_loss: 0.002826690673828125|cri_loss: 0.0015611648559570312|unsuper_loss: 0.0
-average reward score: 5.0625
--------------------------------------------------------------------------------------
-|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.60s (67.47%) |Training time=0.47s (19.75%) |Others=0.30 (12.77%)|CurSamplesPerSec=13.54 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4658|ppo_ep: 1|act_loss: 0.003002166748046875|cri_loss: 0.0018138885498046875|unsuper_loss: 0.0
-average reward score: 5.171875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.78%) |Training time=0.44s (20.49%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
-[2023-04-14 11:39:19,693] [INFO] [logging.py:96:log_dist] [Rank 0] step=4660, skipped=61, lr=[4.050028282292539e-06, 4.050028282292539e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:39:19,711] [INFO] [timer.py:199:stop] epoch=0/micro_step=4660/global_step=4660, RunningAvgSamplesPerSec=105.00860834725098, CurrSamplesPerSec=103.96174217485263, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:39:19,804] [INFO] [logging.py:96:log_dist] [Rank 0] step=4660, skipped=76, lr=[2.112716890863282e-06, 2.112716890863282e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4659|ppo_ep: 1|act_loss: 0.00225830078125|cri_loss: 0.001560211181640625|unsuper_loss: 0.0
-average reward score: 5.0859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.66%) |Training time=0.47s (21.72%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4660|ppo_ep: 1|act_loss: 0.019073486328125|cri_loss: 0.010650634765625|unsuper_loss: 0.0
-average reward score: 6.0
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.56%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4661|ppo_ep: 1|act_loss: -0.0088043212890625|cri_loss: -0.004238128662109375|unsuper_loss: 0.0
-average reward score: 5.2421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.65%) |Training time=0.47s (21.73%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4662|ppo_ep: 1|act_loss: -0.019439697265625|cri_loss: -0.00907135009765625|unsuper_loss: 0.0
-average reward score: 5.453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.73%) |Training time=0.46s (21.46%) |Others=0.10 (4.81%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4663|ppo_ep: 1|act_loss: 0.0100860595703125|cri_loss: 0.0054473876953125|unsuper_loss: 0.0
-average reward score: 5.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.66%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4664|ppo_ep: 1|act_loss: -0.0088348388671875|cri_loss: -0.00360870361328125|unsuper_loss: 0.0
-average reward score: 4.9765625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.68%) |Training time=0.47s (21.69%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4665|ppo_ep: 1|act_loss: -0.02349853515625|cri_loss: -0.01110076904296875|unsuper_loss: 0.0
-average reward score: 6.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.66%) |Training time=0.47s (21.70%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4666|ppo_ep: 1|act_loss: 0.011749267578125|cri_loss: 0.0064697265625|unsuper_loss: 0.0
-average reward score: 5.3984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.54%) |Training time=0.47s (21.82%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4667|ppo_ep: 1|act_loss: 0.03271484375|cri_loss: 0.0168304443359375|unsuper_loss: 0.0
-average reward score: 4.91796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.76%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4668|ppo_ep: 1|act_loss: 0.068115234375|cri_loss: 0.036285400390625|unsuper_loss: 0.0
-average reward score: 5.5859375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.46s (21.45%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
-[2023-04-14 11:39:41,490] [INFO] [logging.py:96:log_dist] [Rank 0] step=4670, skipped=61, lr=[4.031699039632916e-06, 4.031699039632916e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:39:41,509] [INFO] [timer.py:199:stop] epoch=0/micro_step=4670/global_step=4670, RunningAvgSamplesPerSec=105.0078285540475, CurrSamplesPerSec=105.11653158130927, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:39:41,603] [INFO] [logging.py:96:log_dist] [Rank 0] step=4670, skipped=76, lr=[2.1032109990754734e-06, 2.1032109990754734e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4669|ppo_ep: 1|act_loss: 0.0181427001953125|cri_loss: 0.00955963134765625|unsuper_loss: 0.0
-average reward score: 4.52734375
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.52%) |Training time=0.47s (20.09%) |Others=0.10 (4.39%)|CurSamplesPerSec=13.74 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4670|ppo_ep: 1|act_loss: 0.024658203125|cri_loss: 0.0130615234375|unsuper_loss: 0.0
-average reward score: 6.765625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.32%) |Training time=0.48s (22.07%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4671|ppo_ep: 1|act_loss: -0.014862060546875|cri_loss: -0.006984710693359375|unsuper_loss: 0.0
-average reward score: 5.3203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.48s (21.93%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4672|ppo_ep: 1|act_loss: -0.0279693603515625|cri_loss: -0.01361083984375|unsuper_loss: 0.0
-average reward score: 5.20703125
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.96%) |Training time=0.48s (21.79%) |Others=0.12 (5.25%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4673|ppo_ep: 1|act_loss: -0.02764892578125|cri_loss: -0.01336669921875|unsuper_loss: 0.0
-average reward score: 4.77734375
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.64s (72.68%) |Training time=0.52s (22.83%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.18 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4674|ppo_ep: 1|act_loss: -0.03143310546875|cri_loss: -0.0149383544921875|unsuper_loss: 0.0
-average reward score: 5.03515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.65%) |Training time=0.47s (21.69%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4675|ppo_ep: 1|act_loss: -0.027587890625|cri_loss: -0.01340484619140625|unsuper_loss: 0.0
-average reward score: 5.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.28%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4676|ppo_ep: 1|act_loss: -0.04046630859375|cri_loss: -0.01983642578125|unsuper_loss: 0.0
-average reward score: 4.7109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.84%) |Training time=0.46s (21.53%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4677|ppo_ep: 1|act_loss: -0.0153961181640625|cri_loss: -0.007396697998046875|unsuper_loss: 0.0
-average reward score: 5.234375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.49%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4678|ppo_ep: 1|act_loss: -0.02301025390625|cri_loss: -0.0111541748046875|unsuper_loss: 0.0
-average reward score: 6.125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.76%) |Training time=0.46s (21.59%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
-[2023-04-14 11:40:03,192] [INFO] [logging.py:96:log_dist] [Rank 0] step=4680, skipped=61, lr=[4.01338155560726e-06, 4.01338155560726e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:40:03,210] [INFO] [timer.py:199:stop] epoch=0/micro_step=4680/global_step=4680, RunningAvgSamplesPerSec=105.00428085351663, CurrSamplesPerSec=107.38758437052944, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:40:03,303] [INFO] [logging.py:96:log_dist] [Rank 0] step=4680, skipped=76, lr=[2.093710988657859e-06, 2.093710988657859e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4679|ppo_ep: 1|act_loss: -0.0083160400390625|cri_loss: -0.003635406494140625|unsuper_loss: 0.0
-average reward score: 6.09375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.42%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4680|ppo_ep: 1|act_loss: -0.017486572265625|cri_loss: -0.00780487060546875|unsuper_loss: 0.0
-average reward score: 6.421875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.97%) |Training time=0.46s (21.37%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4681|ppo_ep: 1|act_loss: 0.015594482421875|cri_loss: 0.00809478759765625|unsuper_loss: 0.0
-average reward score: 5.171875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.94%) |Training time=0.46s (21.41%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4682|ppo_ep: 1|act_loss: 0.006862640380859375|cri_loss: 0.003787994384765625|unsuper_loss: 0.0
-average reward score: 4.71875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.07%) |Training time=0.46s (21.27%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4683|ppo_ep: 1|act_loss: 0.016693115234375|cri_loss: 0.00894927978515625|unsuper_loss: 0.0
-average reward score: 4.73046875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.91%) |Training time=0.46s (21.44%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4684|ppo_ep: 1|act_loss: 0.0419921875|cri_loss: 0.021636962890625|unsuper_loss: 0.0
-average reward score: 5.06640625
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.20%) |Training time=0.48s (20.51%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.68 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4685|ppo_ep: 1|act_loss: 0.02496337890625|cri_loss: 0.01329803466796875|unsuper_loss: 0.0
-average reward score: 5.35546875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.23%) |Training time=0.45s (21.10%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4686|ppo_ep: 1|act_loss: -0.03363037109375|cri_loss: -0.01654052734375|unsuper_loss: 0.0
-average reward score: 5.23828125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.99%) |Training time=0.46s (21.37%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4687|ppo_ep: 1|act_loss: -0.0013904571533203125|cri_loss: -0.0002593994140625|unsuper_loss: 0.0
-average reward score: 5.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.61s (70.13%) |Training time=0.46s (20.03%) |Others=0.23 (9.84%)|CurSamplesPerSec=13.97 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4688|ppo_ep: 1|act_loss: -0.010955810546875|cri_loss: -0.0045623779296875|unsuper_loss: 0.0
-average reward score: 6.203125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.19%) |Training time=0.48s (22.17%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46
-[2023-04-14 11:40:25,038] [INFO] [logging.py:96:log_dist] [Rank 0] step=4690, skipped=61, lr=[3.995076101724873e-06, 3.995076101724873e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:40:25,057] [INFO] [timer.py:199:stop] epoch=0/micro_step=4690/global_step=4690, RunningAvgSamplesPerSec=105.00716011355068, CurrSamplesPerSec=106.80988441863944, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:40:25,149] [INFO] [logging.py:96:log_dist] [Rank 0] step=4690, skipped=76, lr=[2.084217000423512e-06, 2.084217000423512e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4689|ppo_ep: 1|act_loss: -0.0246734619140625|cri_loss: -0.01197052001953125|unsuper_loss: 0.0
-average reward score: 6.93359375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.89%) |Training time=0.46s (21.49%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4690|ppo_ep: 1|act_loss: -0.03106689453125|cri_loss: -0.014862060546875|unsuper_loss: 0.0
-average reward score: 5.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.22%) |Training time=0.45s (21.10%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4691|ppo_ep: 1|act_loss: 0.003841400146484375|cri_loss: 0.002197265625|unsuper_loss: 0.0
-average reward score: 6.0078125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.97%) |Training time=0.46s (21.39%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4692|ppo_ep: 1|act_loss: 0.023040771484375|cri_loss: 0.01206207275390625|unsuper_loss: 0.0
-average reward score: 6.5234375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.31%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4693|ppo_ep: 1|act_loss: 0.033447265625|cri_loss: 0.0173492431640625|unsuper_loss: 0.0
-average reward score: 5.86328125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.31%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4694|ppo_ep: 1|act_loss: 0.0143585205078125|cri_loss: 0.0074462890625|unsuper_loss: 0.0
-average reward score: 5.1484375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.29%) |Training time=0.45s (21.02%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4695|ppo_ep: 1|act_loss: 0.026763916015625|cri_loss: 0.01410675048828125|unsuper_loss: 0.0
-average reward score: 4.625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.05%) |Training time=0.46s (21.30%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4696|ppo_ep: 1|act_loss: -0.000705718994140625|cri_loss: -0.0001049041748046875|unsuper_loss: 0.0
-average reward score: 5.35546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.95%) |Training time=0.46s (21.40%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4697|ppo_ep: 1|act_loss: -0.001068115234375|cri_loss: -0.0003914833068847656|unsuper_loss: 0.0
-average reward score: 5.453125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.12%) |Training time=0.46s (21.21%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4698|ppo_ep: 1|act_loss: 0.015380859375|cri_loss: 0.00798797607421875|unsuper_loss: 0.0
-average reward score: 4.73828125
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.99%) |Training time=0.46s (20.87%) |Others=0.16 (7.14%)|CurSamplesPerSec=14.49 |AvgSamplesPerSec=14.46
-[2023-04-14 11:40:46,698] [INFO] [logging.py:96:log_dist] [Rank 0] step=4700, skipped=61, lr=[3.97678294931674e-06, 3.97678294931674e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:40:46,716] [INFO] [timer.py:199:stop] epoch=0/micro_step=4700/global_step=4700, RunningAvgSamplesPerSec=105.01293390159111, CurrSamplesPerSec=100.83158391355171, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:40:46,810] [INFO] [logging.py:96:log_dist] [Rank 0] step=4700, skipped=76, lr=[2.07472917509624e-06, 2.07472917509624e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4699|ppo_ep: 1|act_loss: -0.00714874267578125|cri_loss: -0.0033435821533203125|unsuper_loss: 0.0
-average reward score: 4.5546875
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.68s (74.13%) |Training time=0.48s (21.13%) |Others=0.11 (4.73%)|CurSamplesPerSec=14.09 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4700|ppo_ep: 1|act_loss: -0.027618408203125|cri_loss: -0.01334381103515625|unsuper_loss: 0.0
-average reward score: 5.33984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.58%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4701|ppo_ep: 1|act_loss: -0.0284271240234375|cri_loss: -0.0135498046875|unsuper_loss: 0.0
-average reward score: 5.75390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.81%) |Training time=0.46s (21.52%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4702|ppo_ep: 1|act_loss: 0.0157012939453125|cri_loss: 0.00835418701171875|unsuper_loss: 0.0
-average reward score: 5.9296875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.70%) |Training time=0.47s (21.69%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4703|ppo_ep: 1|act_loss: 0.0203857421875|cri_loss: 0.010467529296875|unsuper_loss: 0.0
-average reward score: 5.60546875
--------------------------------------------------------------------------------------
-|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.69s (73.58%) |Training time=0.51s (22.04%) |Others=0.10 (4.38%)|CurSamplesPerSec=13.95 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4704|ppo_ep: 1|act_loss: -0.0079803466796875|cri_loss: -0.003173828125|unsuper_loss: 0.0
-average reward score: 7.0
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.87%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4705|ppo_ep: 1|act_loss: 0.034271240234375|cri_loss: 0.0179901123046875|unsuper_loss: 0.0
-average reward score: 5.671875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.10%) |Training time=0.46s (21.24%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4706|ppo_ep: 1|act_loss: 0.0245819091796875|cri_loss: 0.01285552978515625|unsuper_loss: 0.0
-average reward score: 6.3359375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.98%) |Training time=0.46s (21.37%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4707|ppo_ep: 1|act_loss: 0.0191192626953125|cri_loss: 0.010101318359375|unsuper_loss: 0.0
-average reward score: 6.9375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.45s (21.12%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4708|ppo_ep: 1|act_loss: -0.004428863525390625|cri_loss: -0.0017147064208984375|unsuper_loss: 0.0
-average reward score: 4.609375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.11%) |Training time=0.46s (21.23%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
-[2023-04-14 11:41:08,383] [INFO] [logging.py:96:log_dist] [Rank 0] step=4710, skipped=61, lr=[3.9585023695315105e-06, 3.9585023695315105e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:41:08,402] [INFO] [timer.py:199:stop] epoch=0/micro_step=4710/global_step=4710, RunningAvgSamplesPerSec=105.01362133774731, CurrSamplesPerSec=109.07168687720736, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:41:08,494] [INFO] [logging.py:96:log_dist] [Rank 0] step=4710, skipped=76, lr=[2.0652476533085043e-06, 2.0652476533085043e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4709|ppo_ep: 1|act_loss: 0.0088348388671875|cri_loss: 0.00482940673828125|unsuper_loss: 0.0
-average reward score: 6.0703125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.09%) |Training time=0.46s (21.25%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4710|ppo_ep: 1|act_loss: -0.0226593017578125|cri_loss: -0.0111083984375|unsuper_loss: 0.0
-average reward score: 5.72265625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.81%) |Training time=0.46s (21.53%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4711|ppo_ep: 1|act_loss: -0.02349853515625|cri_loss: -0.01152801513671875|unsuper_loss: 0.0
-average reward score: 5.12109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.79%) |Training time=0.46s (21.55%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4712|ppo_ep: 1|act_loss: 0.0088348388671875|cri_loss: 0.005279541015625|unsuper_loss: 0.0
-average reward score: 6.04296875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.98%) |Training time=0.46s (21.36%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4713|ppo_ep: 1|act_loss: -0.007274627685546875|cri_loss: -0.00310516357421875|unsuper_loss: 0.0
-average reward score: 4.375
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.71%) |Training time=0.49s (20.99%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.66 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4714|ppo_ep: 1|act_loss: 0.004566192626953125|cri_loss: 0.002834320068359375|unsuper_loss: 0.0
-average reward score: 4.8984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.07%) |Training time=0.46s (21.24%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4715|ppo_ep: 1|act_loss: 0.0113983154296875|cri_loss: 0.005950927734375|unsuper_loss: 0.0
-average reward score: 5.3203125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.91%) |Training time=0.46s (21.44%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
-[2023-04-14 11:41:23,609] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 4716|ppo_ep: 1|act_loss: 0.078857421875|cri_loss: 0.044708251953125|unsuper_loss: 0.0
-average reward score: 5.390625
--------------------------------------------------------------------------------------
-|E2E latency=2.92s |Gather latency=0.00s (0.00%) |Generate time=1.59s (54.54%) |Training time=1.23s (42.03%) |Others=0.10 (3.43%)|CurSamplesPerSec=10.98 |AvgSamplesPerSec=14.46
-[2023-04-14 11:41:26,520] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 4717|ppo_ep: 1|act_loss: 0.02716064453125|cri_loss: 0.01409912109375|unsuper_loss: 0.0
-average reward score: 5.77734375
--------------------------------------------------------------------------------------
-|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.59s (75.21%) |Training time=0.42s (20.05%) |Others=0.10 (4.74%)|CurSamplesPerSec=15.11 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4718|ppo_ep: 1|act_loss: 0.00347137451171875|cri_loss: 0.0022602081298828125|unsuper_loss: 0.0
-average reward score: 5.9296875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.47s (21.93%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-[2023-04-14 11:41:30,826] [INFO] [logging.py:96:log_dist] [Rank 0] step=4720, skipped=63, lr=[3.943887140084842e-06, 3.943887140084842e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:41:30,845] [INFO] [timer.py:199:stop] epoch=0/micro_step=4720/global_step=4720, RunningAvgSamplesPerSec=104.96272361672763, CurrSamplesPerSec=110.75861112853234, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:41:30,938] [INFO] [logging.py:96:log_dist] [Rank 0] step=4720, skipped=76, lr=[2.0557725755993283e-06, 2.0557725755993283e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4719|ppo_ep: 1|act_loss: -0.0146484375|cri_loss: -0.005100250244140625|unsuper_loss: 0.0
-average reward score: 5.58203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.46%) |Training time=0.45s (20.90%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4720|ppo_ep: 1|act_loss: 0.005779266357421875|cri_loss: 0.003086090087890625|unsuper_loss: 0.0
-average reward score: 5.26171875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.33%) |Training time=0.43s (19.98%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4721|ppo_ep: 1|act_loss: -0.016845703125|cri_loss: -0.00811004638671875|unsuper_loss: 0.0
-average reward score: 4.7265625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.84%) |Training time=0.44s (20.49%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4722|ppo_ep: 1|act_loss: -0.03753662109375|cri_loss: -0.01436614990234375|unsuper_loss: 0.0
-average reward score: 5.58203125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.03%) |Training time=0.43s (20.29%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4723|ppo_ep: 1|act_loss: -0.008636474609375|cri_loss: -0.0040435791015625|unsuper_loss: 0.0
-average reward score: 5.83984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.61%) |Training time=0.45s (20.72%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4724|ppo_ep: 1|act_loss: 0.004852294921875|cri_loss: 0.002864837646484375|unsuper_loss: 0.0
-average reward score: 4.96875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.82%) |Training time=0.44s (20.51%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4725|ppo_ep: 1|act_loss: 0.0152740478515625|cri_loss: 0.008697509765625|unsuper_loss: 0.0
-average reward score: 4.87890625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.87%) |Training time=0.44s (20.43%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4726|ppo_ep: 1|act_loss: -0.005199432373046875|cri_loss: -0.000682830810546875|unsuper_loss: 0.0
-average reward score: 5.92578125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.68%) |Training time=0.44s (20.65%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4727|ppo_ep: 1|act_loss: 0.023590087890625|cri_loss: 0.0131072998046875|unsuper_loss: 0.0
-average reward score: 4.58984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.56%) |Training time=0.45s (20.78%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4728|ppo_ep: 1|act_loss: 0.003345489501953125|cri_loss: 0.0030059814453125|unsuper_loss: 0.0
-average reward score: 5.6484375
--------------------------------------------------------------------------------------
-|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.79s (75.61%) |Training time=0.48s (20.16%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.52 |AvgSamplesPerSec=14.46
-[2023-04-14 11:41:52,512] [INFO] [logging.py:96:log_dist] [Rank 0] step=4730, skipped=63, lr=[3.925629873718018e-06, 3.925629873718018e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:41:52,531] [INFO] [timer.py:199:stop] epoch=0/micro_step=4730/global_step=4730, RunningAvgSamplesPerSec=104.9805842763996, CurrSamplesPerSec=115.75283565613357, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:41:52,623] [INFO] [logging.py:96:log_dist] [Rank 0] step=4730, skipped=76, lr=[2.046304082412223e-06, 2.046304082412223e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4729|ppo_ep: 1|act_loss: 0.015625|cri_loss: 0.0095672607421875|unsuper_loss: 0.0
-average reward score: 7.2734375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.89%) |Training time=0.44s (20.46%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4730|ppo_ep: 1|act_loss: 0.0770263671875|cri_loss: 0.041961669921875|unsuper_loss: 0.0
-average reward score: 6.16796875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.61%) |Training time=0.45s (20.75%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4731|ppo_ep: 1|act_loss: -0.009735107421875|cri_loss: -0.003570556640625|unsuper_loss: 0.0
-average reward score: 4.68359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.58%) |Training time=0.45s (20.79%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4732|ppo_ep: 1|act_loss: -0.0060272216796875|cri_loss: -0.002765655517578125|unsuper_loss: 0.0
-average reward score: 5.0
--------------------------------------------------------------------------------------
-|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.64s (71.88%) |Training time=0.54s (23.74%) |Others=0.10 (4.38%)|CurSamplesPerSec=13.99 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4733|ppo_ep: 1|act_loss: -0.005260467529296875|cri_loss: -0.00251007080078125|unsuper_loss: 0.0
-average reward score: 6.58203125
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.28%) |Training time=0.49s (22.16%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4734|ppo_ep: 1|act_loss: 0.019622802734375|cri_loss: 0.0103607177734375|unsuper_loss: 0.0
-average reward score: 5.94140625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.71%) |Training time=0.44s (20.64%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4735|ppo_ep: 1|act_loss: -0.00572967529296875|cri_loss: -0.002567291259765625|unsuper_loss: 0.0
-average reward score: 5.63671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.85%) |Training time=0.47s (21.48%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4736|ppo_ep: 1|act_loss: -0.02288818359375|cri_loss: -0.010772705078125|unsuper_loss: 0.0
-average reward score: 6.9375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.48%) |Training time=0.45s (20.87%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4737|ppo_ep: 1|act_loss: 0.0115203857421875|cri_loss: 0.006496429443359375|unsuper_loss: 0.0
-average reward score: 6.45703125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.45%) |Training time=0.45s (20.90%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4738|ppo_ep: 1|act_loss: -0.00241851806640625|cri_loss: -0.0006160736083984375|unsuper_loss: 0.0
-average reward score: 6.13671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.18%) |Training time=0.46s (21.20%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
-[2023-04-14 11:42:14,294] [INFO] [logging.py:96:log_dist] [Rank 0] step=4740, skipped=63, lr=[3.907385938186079e-06, 3.907385938186079e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:42:14,312] [INFO] [timer.py:199:stop] epoch=0/micro_step=4740/global_step=4740, RunningAvgSamplesPerSec=104.98486849818003, CurrSamplesPerSec=111.83245012364935, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:42:14,405] [INFO] [logging.py:96:log_dist] [Rank 0] step=4740, skipped=76, lr=[2.0368423140930975e-06, 2.0368423140930975e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4739|ppo_ep: 1|act_loss: -0.01392364501953125|cri_loss: -0.006786346435546875|unsuper_loss: 0.0
-average reward score: 4.703125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.57%) |Training time=0.45s (20.80%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
-[2023-04-14 11:42:16,448] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
-epoch: 0|step: 4740|ppo_ep: 1|act_loss: -0.0205078125|cri_loss: -0.00921630859375|unsuper_loss: 0.0
-average reward score: 5.4296875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.29%) |Training time=0.43s (20.02%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4741|ppo_ep: 1|act_loss: -0.02386474609375|cri_loss: -0.01117706298828125|unsuper_loss: 0.0
-average reward score: 5.06640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.73%) |Training time=0.44s (20.62%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.47
-[2023-04-14 11:42:20,842] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 4742|ppo_ep: 1|act_loss: -0.0082855224609375|cri_loss: -0.004001617431640625|unsuper_loss: 0.0
-average reward score: 4.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.95%) |Training time=0.45s (20.86%) |Others=0.09 (4.19%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.47
-[2023-04-14 11:42:23,156] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 4743|ppo_ep: 1|act_loss: -0.0103607177734375|cri_loss: -0.0032501220703125|unsuper_loss: 0.0
-average reward score: 4.859375
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.93%) |Training time=0.44s (19.19%) |Others=0.09 (3.88%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.47
-[2023-04-14 11:42:25,190] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192
-epoch: 0|step: 4744|ppo_ep: 1|act_loss: -0.0399169921875|cri_loss: -0.0182647705078125|unsuper_loss: 0.0
-average reward score: 6.47265625
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.48%) |Training time=0.42s (19.83%) |Others=0.10 (4.69%)|CurSamplesPerSec=15.03 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4745|ppo_ep: 1|act_loss: -0.00701141357421875|cri_loss: -0.003337860107421875|unsuper_loss: 0.0
-average reward score: 5.31640625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.59%) |Training time=0.45s (20.78%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4746|ppo_ep: 1|act_loss: 0.006175994873046875|cri_loss: 0.00450897216796875|unsuper_loss: 0.0
-average reward score: 6.01953125
--------------------------------------------------------------------------------------
-|E2E latency=2.86s |Gather latency=0.00s (0.00%) |Generate time=1.61s (56.27%) |Training time=0.44s (15.55%) |Others=0.81 (28.18%)|CurSamplesPerSec=11.20 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4747|ppo_ep: 1|act_loss: -0.00533294677734375|cri_loss: -0.002384185791015625|unsuper_loss: 0.0
-average reward score: 4.984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.99%) |Training time=0.44s (20.32%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4748|ppo_ep: 1|act_loss: 0.005939483642578125|cri_loss: 0.003276824951171875|unsuper_loss: 0.0
-average reward score: 5.9140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.11%) |Training time=0.46s (21.27%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
-[2023-04-14 11:42:36,660] [INFO] [logging.py:96:log_dist] [Rank 0] step=4750, skipped=65, lr=[3.892800569690045e-06, 3.892800569690045e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:42:36,679] [INFO] [timer.py:199:stop] epoch=0/micro_step=4750/global_step=4750, RunningAvgSamplesPerSec=105.00306105783682, CurrSamplesPerSec=112.30368528329925, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:42:36,771] [INFO] [logging.py:96:log_dist] [Rank 0] step=4750, skipped=78, lr=[2.0292778355916135e-06, 2.0292778355916135e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4749|ppo_ep: 1|act_loss: -0.00264739990234375|cri_loss: -0.0008673667907714844|unsuper_loss: 0.0
-average reward score: 6.3046875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.62%) |Training time=0.45s (20.73%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4750|ppo_ep: 1|act_loss: 0.000370025634765625|cri_loss: 0.0034427642822265625|unsuper_loss: 0.0
-average reward score: 5.9921875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.83%) |Training time=0.44s (20.48%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4751|ppo_ep: 1|act_loss: 0.01898193359375|cri_loss: 0.00974273681640625|unsuper_loss: 0.0
-average reward score: 6.25
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.66%) |Training time=0.44s (20.67%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4752|ppo_ep: 1|act_loss: -0.0025463104248046875|cri_loss: -0.0009636878967285156|unsuper_loss: 0.0
-average reward score: 5.4453125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.69%) |Training time=0.44s (20.63%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
-epoch: 0|step: 4753|ppo_ep: 1|act_loss: 0.02203369140625|cri_loss: 0.011383056640625|unsuper_loss: 0.0
-average reward score: 5.05859375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.61%) |Training time=0.45s (20.72%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4754|ppo_ep: 1|act_loss: 0.0150146484375|cri_loss: 0.00759124755859375|unsuper_loss: 0.0
-average reward score: 6.05078125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.87%) |Training time=0.44s (20.46%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4755|ppo_ep: 1|act_loss: 0.00328826904296875|cri_loss: 0.002582550048828125|unsuper_loss: 0.0
-average reward score: 5.7890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.83%) |Training time=0.44s (20.54%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4756|ppo_ep: 1|act_loss: -0.01629638671875|cri_loss: -0.00687408447265625|unsuper_loss: 0.0
-average reward score: 6.4765625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.61%) |Training time=0.45s (20.73%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4757|ppo_ep: 1|act_loss: -0.0019683837890625|cri_loss: -0.0006279945373535156|unsuper_loss: 0.0
-average reward score: 4.71484375
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.31%) |Training time=0.45s (19.34%) |Others=0.10 (4.35%)|CurSamplesPerSec=13.78 |AvgSamplesPerSec=14.47
-[2023-04-14 11:42:56,174] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096
-epoch: 0|step: 4758|ppo_ep: 1|act_loss: -0.021881103515625|cri_loss: -0.009552001953125|unsuper_loss: 0.0
-average reward score: 4.94921875
--------------------------------------------------------------------------------------
-|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.61%) |Training time=0.42s (19.67%) |Others=0.10 (4.72%)|CurSamplesPerSec=15.09 |AvgSamplesPerSec=14.47
-[2023-04-14 11:42:58,305] [INFO] [logging.py:96:log_dist] [Rank 0] step=4760, skipped=66, lr=[3.876402607369461e-06, 3.876402607369461e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:42:58,323] [INFO] [timer.py:199:stop] epoch=0/micro_step=4760/global_step=4760, RunningAvgSamplesPerSec=105.02179852561878, CurrSamplesPerSec=113.14903772794369, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:42:58,416] [INFO] [logging.py:96:log_dist] [Rank 0] step=4760, skipped=78, lr=[2.0198285253870464e-06, 2.0198285253870464e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4759|ppo_ep: 1|act_loss: -0.00461578369140625|cri_loss: -0.002063751220703125|unsuper_loss: 0.0
-average reward score: 4.8203125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.63%) |Training time=0.45s (20.73%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4760|ppo_ep: 1|act_loss: 0.00736236572265625|cri_loss: 0.003910064697265625|unsuper_loss: 0.0
-average reward score: 5.21875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.52%) |Training time=0.45s (20.83%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4761|ppo_ep: 1|act_loss: -0.047882080078125|cri_loss: -0.023040771484375|unsuper_loss: 0.0
-average reward score: 6.04296875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.20%) |Training time=0.44s (20.21%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4762|ppo_ep: 1|act_loss: -0.001644134521484375|cri_loss: -0.0004086494445800781|unsuper_loss: 0.0
-average reward score: 4.51171875
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.71s (76.02%) |Training time=0.44s (19.52%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.22 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4763|ppo_ep: 1|act_loss: 0.0257568359375|cri_loss: 0.01320648193359375|unsuper_loss: 0.0
-average reward score: 5.1015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.28%) |Training time=0.45s (21.05%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4764|ppo_ep: 1|act_loss: -0.0240020751953125|cri_loss: -0.0116119384765625|unsuper_loss: 0.0
-average reward score: 4.6171875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.62%) |Training time=0.45s (20.70%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4765|ppo_ep: 1|act_loss: 0.0045318603515625|cri_loss: 0.002452850341796875|unsuper_loss: 0.0
-average reward score: 5.484375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.65%) |Training time=0.44s (20.68%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4766|ppo_ep: 1|act_loss: -0.0030364990234375|cri_loss: -0.0010528564453125|unsuper_loss: 0.0
-average reward score: 6.16796875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.95%) |Training time=0.44s (20.35%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4767|ppo_ep: 1|act_loss: 0.06121826171875|cri_loss: 0.03350830078125|unsuper_loss: 0.0
-average reward score: 5.7421875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.81%) |Training time=0.44s (20.49%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4768|ppo_ep: 1|act_loss: -0.02301025390625|cri_loss: -0.0109710693359375|unsuper_loss: 0.0
-average reward score: 4.9375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.02%) |Training time=0.43s (20.25%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.47
-[2023-04-14 11:43:19,934] [INFO] [logging.py:96:log_dist] [Rank 0] step=4770, skipped=66, lr=[3.8581960152626685e-06, 3.8581960152626685e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:43:19,953] [INFO] [timer.py:199:stop] epoch=0/micro_step=4770/global_step=4770, RunningAvgSamplesPerSec=105.03973265983572, CurrSamplesPerSec=115.99823346444069, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:43:20,046] [INFO] [logging.py:96:log_dist] [Rank 0] step=4770, skipped=78, lr=[2.010386332482083e-06, 2.010386332482083e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4769|ppo_ep: 1|act_loss: -0.02618408203125|cri_loss: -0.0127410888671875|unsuper_loss: 0.0
-average reward score: 5.09375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.86%) |Training time=0.44s (20.46%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4770|ppo_ep: 1|act_loss: -0.01104736328125|cri_loss: -0.00519561767578125|unsuper_loss: 0.0
-average reward score: 4.78515625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.92%) |Training time=0.44s (20.38%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4771|ppo_ep: 1|act_loss: 0.024688720703125|cri_loss: 0.01568603515625|unsuper_loss: 0.0
-average reward score: 5.25390625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.57%) |Training time=0.45s (20.77%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4772|ppo_ep: 1|act_loss: -0.039794921875|cri_loss: -0.0181884765625|unsuper_loss: 0.0
-average reward score: 5.48046875
--------------------------------------------------------------------------------------
-|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.60s (71.94%) |Training time=0.46s (20.59%) |Others=0.17 (7.47%)|CurSamplesPerSec=14.35 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4773|ppo_ep: 1|act_loss: -0.0227508544921875|cri_loss: -0.01081085205078125|unsuper_loss: 0.0
-average reward score: 5.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.54%) |Training time=0.43s (19.84%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4774|ppo_ep: 1|act_loss: -0.018585205078125|cri_loss: -0.00902557373046875|unsuper_loss: 0.0
-average reward score: 6.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.70%) |Training time=0.44s (20.64%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4775|ppo_ep: 1|act_loss: -0.01849365234375|cri_loss: -0.008697509765625|unsuper_loss: 0.0
-average reward score: 5.1171875
--------------------------------------------------------------------------------------
-|E2E latency=2.96s |Gather latency=0.00s (0.00%) |Generate time=1.61s (54.33%) |Training time=0.44s (14.90%) |Others=0.91 (30.77%)|CurSamplesPerSec=10.83 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4776|ppo_ep: 1|act_loss: -0.00054931640625|cri_loss: -2.384185791015625e-05|unsuper_loss: 0.0
-average reward score: 4.90234375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.81%) |Training time=0.44s (20.50%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4777|ppo_ep: 1|act_loss: 0.01239013671875|cri_loss: 0.006465911865234375|unsuper_loss: 0.0
-average reward score: 6.9921875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.88%) |Training time=0.44s (20.43%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4778|ppo_ep: 1|act_loss: 0.029632568359375|cri_loss: 0.0154571533203125|unsuper_loss: 0.0
-average reward score: 4.79296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.75%) |Training time=0.44s (20.56%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.47
-[2023-04-14 11:43:42,352] [INFO] [logging.py:96:log_dist] [Rank 0] step=4780, skipped=66, lr=[3.840003753523217e-06, 3.840003753523217e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:43:42,371] [INFO] [timer.py:199:stop] epoch=0/micro_step=4780/global_step=4780, RunningAvgSamplesPerSec=105.0575480387665, CurrSamplesPerSec=113.63751879597393, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:43:42,463] [INFO] [logging.py:96:log_dist] [Rank 0] step=4780, skipped=78, lr=[2.000951396832801e-06, 2.000951396832801e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4779|ppo_ep: 1|act_loss: -0.0135650634765625|cri_loss: -0.005779266357421875|unsuper_loss: 0.0
-average reward score: 5.31640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.73%) |Training time=0.45s (20.62%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4780|ppo_ep: 1|act_loss: 0.04937744140625|cri_loss: 0.0261383056640625|unsuper_loss: 0.0
-average reward score: 5.17578125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.55%) |Training time=0.45s (20.79%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4781|ppo_ep: 1|act_loss: 0.011749267578125|cri_loss: 0.007293701171875|unsuper_loss: 0.0
-average reward score: 4.75
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.86%) |Training time=0.44s (20.47%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4782|ppo_ep: 1|act_loss: 0.03631591796875|cri_loss: 0.019195556640625|unsuper_loss: 0.0
-average reward score: 4.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.88%) |Training time=0.44s (20.44%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4783|ppo_ep: 1|act_loss: 0.034027099609375|cri_loss: 0.01751708984375|unsuper_loss: 0.0
-average reward score: 5.6171875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.16%) |Training time=0.44s (20.18%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4784|ppo_ep: 1|act_loss: 0.0131072998046875|cri_loss: 0.007442474365234375|unsuper_loss: 0.0
-average reward score: 5.18359375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.83%) |Training time=0.44s (20.48%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4785|ppo_ep: 1|act_loss: 0.00457000732421875|cri_loss: 0.002666473388671875|unsuper_loss: 0.0
-average reward score: 4.93359375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.70%) |Training time=0.44s (20.63%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4786|ppo_ep: 1|act_loss: -0.018707275390625|cri_loss: -0.0084686279296875|unsuper_loss: 0.0
-average reward score: 5.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.93%) |Training time=0.44s (20.39%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4787|ppo_ep: 1|act_loss: -0.018280029296875|cri_loss: -0.00768280029296875|unsuper_loss: 0.0
-average reward score: 5.20703125
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.48%) |Training time=0.44s (19.18%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.86 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4788|ppo_ep: 1|act_loss: 0.0078887939453125|cri_loss: 0.0048980712890625|unsuper_loss: 0.0
-average reward score: 5.4921875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.85%) |Training time=0.44s (20.47%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.47
-[2023-04-14 11:44:03,971] [INFO] [logging.py:96:log_dist] [Rank 0] step=4790, skipped=66, lr=[3.821826091804312e-06, 3.821826091804312e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:44:03,990] [INFO] [timer.py:199:stop] epoch=0/micro_step=4790/global_step=4790, RunningAvgSamplesPerSec=105.07718399184161, CurrSamplesPerSec=115.61762668881086, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:44:04,082] [INFO] [logging.py:96:log_dist] [Rank 0] step=4790, skipped=78, lr=[1.9915238582877077e-06, 1.9915238582877077e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4789|ppo_ep: 1|act_loss: -0.032135009765625|cri_loss: -0.006683349609375|unsuper_loss: 0.0
-average reward score: 5.58984375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.82%) |Training time=0.44s (20.50%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.47
-[2023-04-14 11:44:06,141] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 4096, reducing to 2048
-epoch: 0|step: 4790|ppo_ep: 1|act_loss: -0.0193939208984375|cri_loss: -0.0087890625|unsuper_loss: 0.0
-average reward score: 5.7890625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.49%) |Training time=0.43s (19.85%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4791|ppo_ep: 1|act_loss: -0.03594970703125|cri_loss: -0.017120361328125|unsuper_loss: 0.0
-average reward score: 4.75390625
--------------------------------------------------------------------------------------
-|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.98%) |Training time=0.44s (19.55%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.28 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4792|ppo_ep: 1|act_loss: -0.00423431396484375|cri_loss: -0.0019321441650390625|unsuper_loss: 0.0
-average reward score: 5.50390625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.77%) |Training time=0.44s (20.55%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4793|ppo_ep: 1|act_loss: -0.0269622802734375|cri_loss: -0.0128021240234375|unsuper_loss: 0.0
-average reward score: 5.71875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.58%) |Training time=0.45s (20.73%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4794|ppo_ep: 1|act_loss: 0.04144287109375|cri_loss: 0.023712158203125|unsuper_loss: 0.0
-average reward score: 3.994140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.43%) |Training time=0.45s (20.90%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4795|ppo_ep: 1|act_loss: 0.0204315185546875|cri_loss: 0.0131378173828125|unsuper_loss: 0.0
-average reward score: 5.4921875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.75%) |Training time=0.44s (20.57%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4796|ppo_ep: 1|act_loss: 0.0016412734985351562|cri_loss: 0.000988006591796875|unsuper_loss: 0.0
-average reward score: 4.734375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.02%) |Training time=0.44s (20.29%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4797|ppo_ep: 1|act_loss: 0.01119232177734375|cri_loss: 0.0069122314453125|unsuper_loss: 0.0
-average reward score: 5.45703125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.59%) |Training time=0.45s (20.76%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4798|ppo_ep: 1|act_loss: -0.0167083740234375|cri_loss: -0.00778961181640625|unsuper_loss: 0.0
-average reward score: 5.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.75%) |Training time=0.44s (20.58%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.47
-[2023-04-14 11:44:25,543] [INFO] [logging.py:96:log_dist] [Rank 0] step=4800, skipped=67, lr=[3.8054789019689153e-06, 3.8054789019689153e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:44:25,562] [INFO] [timer.py:199:stop] epoch=0/micro_step=4800/global_step=4800, RunningAvgSamplesPerSec=105.09690332358787, CurrSamplesPerSec=114.88991285099068, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:44:25,655] [INFO] [logging.py:96:log_dist] [Rank 0] step=4800, skipped=78, lr=[1.9821038565856653e-06, 1.9821038565856653e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4799|ppo_ep: 1|act_loss: -0.030059814453125|cri_loss: -0.01418304443359375|unsuper_loss: 0.0
-average reward score: 7.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.81%) |Training time=0.44s (20.54%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4800|ppo_ep: 1|act_loss: 0.0014495849609375|cri_loss: 0.0008115768432617188|unsuper_loss: 0.0
-average reward score: 6.1640625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.13%) |Training time=0.43s (20.16%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4801|ppo_ep: 1|act_loss: 0.007587432861328125|cri_loss: 0.004497528076171875|unsuper_loss: 0.0
-average reward score: 5.3203125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.86%) |Training time=0.44s (20.48%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4802|ppo_ep: 1|act_loss: 0.025054931640625|cri_loss: 0.017242431640625|unsuper_loss: 0.0
-average reward score: 4.765625
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.44%) |Training time=0.45s (19.22%) |Others=0.10 (4.34%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4803|ppo_ep: 1|act_loss: 0.03131103515625|cri_loss: 0.016448974609375|unsuper_loss: 0.0
-average reward score: 4.046875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.67%) |Training time=0.44s (20.67%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4804|ppo_ep: 1|act_loss: 0.006938934326171875|cri_loss: 0.00620269775390625|unsuper_loss: 0.0
-average reward score: 5.4609375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.56%) |Training time=0.45s (20.79%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4805|ppo_ep: 1|act_loss: -0.022186279296875|cri_loss: -0.00853729248046875|unsuper_loss: 0.0
-average reward score: 5.29296875
--------------------------------------------------------------------------------------
-|E2E latency=2.54s |Gather latency=0.00s (0.00%) |Generate time=1.60s (63.07%) |Training time=0.44s (17.42%) |Others=0.50 (19.51%)|CurSamplesPerSec=12.59 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4806|ppo_ep: 1|act_loss: -0.0245361328125|cri_loss: -0.01068115234375|unsuper_loss: 0.0
-average reward score: 5.84375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.79%) |Training time=0.44s (20.51%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4807|ppo_ep: 1|act_loss: -0.003475189208984375|cri_loss: 0.0003528594970703125|unsuper_loss: 0.0
-average reward score: 4.68359375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.62%) |Training time=0.45s (20.72%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4808|ppo_ep: 1|act_loss: -0.023895263671875|cri_loss: -0.011199951171875|unsuper_loss: 0.0
-average reward score: 5.62109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.68%) |Training time=0.44s (20.63%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.47
-[2023-04-14 11:44:47,592] [INFO] [logging.py:96:log_dist] [Rank 0] step=4810, skipped=67, lr=[3.7873297224053474e-06, 3.7873297224053474e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:44:47,610] [INFO] [timer.py:199:stop] epoch=0/micro_step=4810/global_step=4810, RunningAvgSamplesPerSec=105.11458737215244, CurrSamplesPerSec=111.97529193575703, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:44:47,702] [INFO] [logging.py:96:log_dist] [Rank 0] step=4810, skipped=78, lr=[1.972691531353826e-06, 1.972691531353826e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4809|ppo_ep: 1|act_loss: -0.017120361328125|cri_loss: -0.008392333984375|unsuper_loss: 0.0
-average reward score: 5.8984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.55%) |Training time=0.45s (20.80%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4810|ppo_ep: 1|act_loss: -0.052734375|cri_loss: -0.025604248046875|unsuper_loss: 0.0
-average reward score: 4.78515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.85%) |Training time=0.44s (20.53%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4811|ppo_ep: 1|act_loss: -0.004974365234375|cri_loss: -0.0013408660888671875|unsuper_loss: 0.0
-average reward score: 4.6953125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.79%) |Training time=0.44s (20.53%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4812|ppo_ep: 1|act_loss: 0.007965087890625|cri_loss: 0.0068359375|unsuper_loss: 0.0
-average reward score: 5.62890625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.59%) |Training time=0.45s (20.74%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4813|ppo_ep: 1|act_loss: 0.013336181640625|cri_loss: 0.007358551025390625|unsuper_loss: 0.0
-average reward score: 6.703125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.60%) |Training time=0.45s (20.73%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4814|ppo_ep: 1|act_loss: 0.03265380859375|cri_loss: 0.0167083740234375|unsuper_loss: 0.0
-average reward score: 5.16015625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.54%) |Training time=0.45s (20.73%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4815|ppo_ep: 1|act_loss: 0.0244293212890625|cri_loss: 0.01416015625|unsuper_loss: 0.0
-average reward score: 4.9375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.67%) |Training time=0.44s (20.65%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4816|ppo_ep: 1|act_loss: 0.01470947265625|cri_loss: 0.0077667236328125|unsuper_loss: 0.0
-average reward score: 7.00390625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.76%) |Training time=0.44s (20.58%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4817|ppo_ep: 1|act_loss: 0.013702392578125|cri_loss: 0.007625579833984375|unsuper_loss: 0.0
-average reward score: 5.1015625
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.60s (71.99%) |Training time=0.46s (20.71%) |Others=0.16 (7.30%)|CurSamplesPerSec=14.40 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4818|ppo_ep: 1|act_loss: 0.027618408203125|cri_loss: 0.01415252685546875|unsuper_loss: 0.0
-average reward score: 5.9140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.79%) |Training time=0.44s (20.42%) |Others=0.10 (4.79%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.47
-[2023-04-14 11:45:09,187] [INFO] [logging.py:96:log_dist] [Rank 0] step=4820, skipped=67, lr=[3.769195923618539e-06, 3.769195923618539e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:45:09,206] [INFO] [timer.py:199:stop] epoch=0/micro_step=4820/global_step=4820, RunningAvgSamplesPerSec=105.12987027841997, CurrSamplesPerSec=109.2328892433651, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:45:09,298] [INFO] [logging.py:96:log_dist] [Rank 0] step=4820, skipped=78, lr=[1.9632870221055535e-06, 1.9632870221055535e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4819|ppo_ep: 1|act_loss: 0.0164794921875|cri_loss: 0.00868988037109375|unsuper_loss: 0.0
-average reward score: 4.78125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.33%) |Training time=0.46s (21.06%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4820|ppo_ep: 1|act_loss: 0.02825927734375|cri_loss: 0.01436614990234375|unsuper_loss: 0.0
-average reward score: 4.9609375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.82%) |Training time=0.44s (20.27%) |Others=0.11 (4.91%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4821|ppo_ep: 1|act_loss: -0.001445770263671875|cri_loss: -0.0004239082336425781|unsuper_loss: 0.0
-average reward score: 5.0859375
--------------------------------------------------------------------------------------
-|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.69s (75.59%) |Training time=0.45s (19.93%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.30 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4822|ppo_ep: 1|act_loss: -0.024169921875|cri_loss: -0.01146697998046875|unsuper_loss: 0.0
-average reward score: 4.38671875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.60%) |Training time=0.45s (20.74%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4823|ppo_ep: 1|act_loss: -0.0151824951171875|cri_loss: -0.00701141357421875|unsuper_loss: 0.0
-average reward score: 4.98046875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.56%) |Training time=0.45s (20.77%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4824|ppo_ep: 1|act_loss: -0.048370361328125|cri_loss: -0.0208587646484375|unsuper_loss: 0.0
-average reward score: 5.0
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.33%) |Training time=0.45s (21.02%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4825|ppo_ep: 1|act_loss: -0.0100555419921875|cri_loss: -0.004581451416015625|unsuper_loss: 0.0
-average reward score: 5.5390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.90%) |Training time=0.44s (20.44%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4826|ppo_ep: 1|act_loss: 0.054901123046875|cri_loss: 0.029449462890625|unsuper_loss: 0.0
-average reward score: 4.85546875
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.62%) |Training time=0.44s (20.06%) |Others=0.16 (7.32%)|CurSamplesPerSec=14.49 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4827|ppo_ep: 1|act_loss: -0.01763916015625|cri_loss: -0.008514404296875|unsuper_loss: 0.0
-average reward score: 4.390625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.68%) |Training time=0.44s (20.63%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4828|ppo_ep: 1|act_loss: 0.0816650390625|cri_loss: 0.044952392578125|unsuper_loss: 0.0
-average reward score: 4.6953125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.89%) |Training time=0.44s (20.43%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.47
-[2023-04-14 11:45:30,886] [INFO] [logging.py:96:log_dist] [Rank 0] step=4830, skipped=67, lr=[3.7510777743951295e-06, 3.7510777743951295e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:45:30,905] [INFO] [timer.py:199:stop] epoch=0/micro_step=4830/global_step=4830, RunningAvgSamplesPerSec=105.14553518088816, CurrSamplesPerSec=112.7394056175783, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:45:30,998] [INFO] [logging.py:96:log_dist] [Rank 0] step=4830, skipped=78, lr=[1.9538904682383637e-06, 1.9538904682383637e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4829|ppo_ep: 1|act_loss: 0.00909423828125|cri_loss: 0.0047149658203125|unsuper_loss: 0.0
-average reward score: 5.10546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.60%) |Training time=0.45s (20.77%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4830|ppo_ep: 1|act_loss: -0.000301361083984375|cri_loss: 0.00032329559326171875|unsuper_loss: 0.0
-average reward score: 6.6796875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.69%) |Training time=0.44s (20.64%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4831|ppo_ep: 1|act_loss: -0.0018453598022460938|cri_loss: -0.0007162094116210938|unsuper_loss: 0.0
-average reward score: 4.7421875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.69%) |Training time=0.44s (20.65%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4832|ppo_ep: 1|act_loss: 0.0035800933837890625|cri_loss: 0.00217437744140625|unsuper_loss: 0.0
-average reward score: 5.5703125
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.78s (78.49%) |Training time=0.39s (17.07%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.13 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4833|ppo_ep: 1|act_loss: 0.0008764266967773438|cri_loss: 0.0006198883056640625|unsuper_loss: 0.0
-average reward score: 3.99609375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.77%) |Training time=0.44s (20.58%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4834|ppo_ep: 1|act_loss: -0.00731658935546875|cri_loss: -0.0033473968505859375|unsuper_loss: 0.0
-average reward score: 4.3125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.56%) |Training time=0.45s (20.78%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4835|ppo_ep: 1|act_loss: -0.0066375732421875|cri_loss: -0.003192901611328125|unsuper_loss: 0.0
-average reward score: 4.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.96%) |Training time=0.44s (20.35%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4836|ppo_ep: 1|act_loss: 0.0096588134765625|cri_loss: 0.005191802978515625|unsuper_loss: 0.0
-average reward score: 5.4296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.68%) |Training time=0.44s (20.67%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4837|ppo_ep: 1|act_loss: -0.018707275390625|cri_loss: -0.00823974609375|unsuper_loss: 0.0
-average reward score: 5.78125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.53%) |Training time=0.45s (20.80%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4838|ppo_ep: 1|act_loss: 0.005157470703125|cri_loss: 0.005462646484375|unsuper_loss: 0.0
-average reward score: 5.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.62s (69.41%) |Training time=0.44s (18.96%) |Others=0.27 (11.63%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.47
-[2023-04-14 11:45:52,873] [INFO] [logging.py:96:log_dist] [Rank 0] step=4840, skipped=67, lr=[3.7329755432898005e-06, 3.7329755432898005e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:45:52,891] [INFO] [timer.py:199:stop] epoch=0/micro_step=4840/global_step=4840, RunningAvgSamplesPerSec=105.16561029558453, CurrSamplesPerSec=110.12817981835353, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:45:52,984] [INFO] [logging.py:96:log_dist] [Rank 0] step=4840, skipped=78, lr=[1.944502009031851e-06, 1.944502009031851e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4839|ppo_ep: 1|act_loss: 0.04376220703125|cri_loss: 0.02435302734375|unsuper_loss: 0.0
-average reward score: 5.25
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.36%) |Training time=0.45s (21.01%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4840|ppo_ep: 1|act_loss: 0.02081298828125|cri_loss: 0.01064300537109375|unsuper_loss: 0.0
-average reward score: 5.03515625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.56%) |Training time=0.45s (20.79%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4841|ppo_ep: 1|act_loss: -0.0098876953125|cri_loss: -0.0048065185546875|unsuper_loss: 0.0
-average reward score: 5.546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.52%) |Training time=0.45s (20.81%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4842|ppo_ep: 1|act_loss: 0.0190887451171875|cri_loss: 0.0098876953125|unsuper_loss: 0.0
-average reward score: 6.19140625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.65%) |Training time=0.44s (20.69%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.47
-[2023-04-14 11:46:01,459] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 2048, reducing to 1024
-epoch: 0|step: 4843|ppo_ep: 1|act_loss: -0.08251953125|cri_loss: 0.013916015625|unsuper_loss: 0.0
-average reward score: 5.1640625
--------------------------------------------------------------------------------------
-|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.57%) |Training time=0.42s (19.72%) |Others=0.10 (4.71%)|CurSamplesPerSec=15.13 |AvgSamplesPerSec=14.47
-[2023-04-14 11:46:03,690] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 4844|ppo_ep: 1|act_loss: -0.0209197998046875|cri_loss: -0.01010894775390625|unsuper_loss: 0.0
-average reward score: 5.5703125
--------------------------------------------------------------------------------------
-|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.60s (69.56%) |Training time=0.45s (19.41%) |Others=0.25 (11.03%)|CurSamplesPerSec=13.90 |AvgSamplesPerSec=14.47
-[2023-04-14 11:46:05,989] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 4845|ppo_ep: 1|act_loss: -0.024261474609375|cri_loss: -0.0117340087890625|unsuper_loss: 0.0
-average reward score: 5.56640625
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.92%) |Training time=0.45s (20.90%) |Others=0.09 (4.17%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4846|ppo_ep: 1|act_loss: 0.01186370849609375|cri_loss: 0.00653839111328125|unsuper_loss: 0.0
-average reward score: 4.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.66%) |Training time=0.44s (20.62%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4847|ppo_ep: 1|act_loss: -0.0236663818359375|cri_loss: -0.0113372802734375|unsuper_loss: 0.0
-average reward score: 4.375
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.49%) |Training time=0.44s (19.16%) |Others=0.10 (4.35%)|CurSamplesPerSec=13.86 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4848|ppo_ep: 1|act_loss: -0.0061187744140625|cri_loss: -0.00244903564453125|unsuper_loss: 0.0
-average reward score: 4.9765625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.57%) |Training time=0.44s (20.54%) |Others=0.11 (4.89%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47
-[2023-04-14 11:46:14,645] [INFO] [logging.py:96:log_dist] [Rank 0] step=4850, skipped=68, lr=[3.7166973670563355e-06, 3.7166973670563355e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:46:14,664] [INFO] [timer.py:199:stop] epoch=0/micro_step=4850/global_step=4850, RunningAvgSamplesPerSec=105.18298878559892, CurrSamplesPerSec=112.45244484735046, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:46:14,757] [INFO] [logging.py:96:log_dist] [Rank 0] step=4850, skipped=80, lr=[1.9369971633416747e-06, 1.9369971633416747e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4849|ppo_ep: 1|act_loss: 0.0143890380859375|cri_loss: 0.008087158203125|unsuper_loss: 0.0
-average reward score: 5.74609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.77%) |Training time=0.45s (20.64%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4850|ppo_ep: 1|act_loss: -0.0021076202392578125|cri_loss: -0.0008134841918945312|unsuper_loss: 0.0
-average reward score: 5.41015625
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.78%) |Training time=0.44s (19.76%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.25 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4851|ppo_ep: 1|act_loss: -0.00928497314453125|cri_loss: -0.004505157470703125|unsuper_loss: 0.0
-average reward score: 5.62109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.40%) |Training time=0.45s (20.76%) |Others=0.10 (4.83%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4852|ppo_ep: 1|act_loss: 0.008697509765625|cri_loss: 0.00479888916015625|unsuper_loss: 0.0
-average reward score: 5.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.75%) |Training time=0.44s (20.56%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4853|ppo_ep: 1|act_loss: 0.0016317367553710938|cri_loss: 0.00292205810546875|unsuper_loss: 0.0
-average reward score: 6.00390625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.69%) |Training time=0.44s (20.63%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4854|ppo_ep: 1|act_loss: -0.00138092041015625|cri_loss: -0.00043582916259765625|unsuper_loss: 0.0
-average reward score: 5.734375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.57%) |Training time=0.45s (20.77%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4855|ppo_ep: 1|act_loss: 0.1221923828125|cri_loss: 0.06793212890625|unsuper_loss: 0.0
-average reward score: 4.80078125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.97%) |Training time=0.44s (20.36%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4856|ppo_ep: 1|act_loss: -0.00634002685546875|cri_loss: -0.002719879150390625|unsuper_loss: 0.0
-average reward score: 5.53515625
--------------------------------------------------------------------------------------
-|E2E latency=2.87s |Gather latency=0.00s (0.00%) |Generate time=1.61s (56.00%) |Training time=0.44s (15.37%) |Others=0.82 (28.63%)|CurSamplesPerSec=11.16 |AvgSamplesPerSec=14.47
-[2023-04-14 11:46:32,648] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 1024, reducing to 512
-[2023-04-14 11:46:32,734] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
-epoch: 0|step: 4857|ppo_ep: 1|act_loss: -0.033935546875|cri_loss: 0.1702880859375|unsuper_loss: 0.0
-average reward score: 4.53125
--------------------------------------------------------------------------------------
-|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.84%) |Training time=0.42s (19.84%) |Others=0.09 (4.32%)|CurSamplesPerSec=15.14 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4858|ppo_ep: 1|act_loss: 0.02703857421875|cri_loss: 0.016845703125|unsuper_loss: 0.0
-average reward score: 4.40625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.84%) |Training time=0.44s (20.47%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47
-[2023-04-14 11:46:36,918] [INFO] [logging.py:96:log_dist] [Rank 0] step=4860, skipped=69, lr=[3.700432497278104e-06, 3.700432497278104e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:46:36,937] [INFO] [timer.py:199:stop] epoch=0/micro_step=4860/global_step=4860, RunningAvgSamplesPerSec=105.2016877522836, CurrSamplesPerSec=113.58462629500683, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:46:37,029] [INFO] [logging.py:96:log_dist] [Rank 0] step=4860, skipped=81, lr=[1.9285605994579322e-06, 1.9285605994579322e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4859|ppo_ep: 1|act_loss: 0.02972412109375|cri_loss: 0.0167388916015625|unsuper_loss: 0.0
-average reward score: 5.84375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.66%) |Training time=0.44s (20.69%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4860|ppo_ep: 1|act_loss: 0.006023406982421875|cri_loss: 0.003147125244140625|unsuper_loss: 0.0
-average reward score: 5.91015625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.73%) |Training time=0.44s (20.60%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4861|ppo_ep: 1|act_loss: -0.0489501953125|cri_loss: -0.0228729248046875|unsuper_loss: 0.0
-average reward score: 5.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.27%) |Training time=0.46s (21.01%) |Others=0.13 (5.72%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4862|ppo_ep: 1|act_loss: -0.029449462890625|cri_loss: -0.01416778564453125|unsuper_loss: 0.0
-average reward score: 4.23046875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.80%) |Training time=0.44s (20.23%) |Others=0.11 (4.96%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4863|ppo_ep: 1|act_loss: 0.02264404296875|cri_loss: 0.01192474365234375|unsuper_loss: 0.0
-average reward score: 5.73828125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.60%) |Training time=0.45s (20.72%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4864|ppo_ep: 1|act_loss: 0.0120697021484375|cri_loss: 0.006404876708984375|unsuper_loss: 0.0
-average reward score: 6.19921875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.64%) |Training time=0.44s (20.68%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4865|ppo_ep: 1|act_loss: 0.007213592529296875|cri_loss: 0.0038242340087890625|unsuper_loss: 0.0
-average reward score: 4.9921875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.85%) |Training time=0.44s (20.49%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4866|ppo_ep: 1|act_loss: 0.0120849609375|cri_loss: 0.006259918212890625|unsuper_loss: 0.0
-average reward score: 6.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.67%) |Training time=0.44s (20.66%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4867|ppo_ep: 1|act_loss: 0.024078369140625|cri_loss: 0.012603759765625|unsuper_loss: 0.0
-average reward score: 4.25
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.70%) |Training time=0.44s (20.63%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47
-[2023-04-14 11:46:56,311] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 512, reducing to 256
-[2023-04-14 11:46:56,397] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192
-epoch: 0|step: 4868|ppo_ep: 1|act_loss: 0.000885009765625|cri_loss: 0.0006785392761230469|unsuper_loss: 0.0
-average reward score: 7.36328125
--------------------------------------------------------------------------------------
-|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.60s (76.11%) |Training time=0.41s (19.55%) |Others=0.09 (4.34%)|CurSamplesPerSec=15.19 |AvgSamplesPerSec=14.47
-[2023-04-14 11:46:58,434] [INFO] [logging.py:96:log_dist] [Rank 0] step=4870, skipped=70, lr=[3.6841811292336835e-06, 3.6841811292336835e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:46:58,453] [INFO] [timer.py:199:stop] epoch=0/micro_step=4870/global_step=4870, RunningAvgSamplesPerSec=105.2193610408015, CurrSamplesPerSec=112.97113388004588, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:46:58,545] [INFO] [logging.py:96:log_dist] [Rank 0] step=4870, skipped=82, lr=[1.9201308963652553e-06, 1.9201308963652553e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4869|ppo_ep: 1|act_loss: 0.039154052734375|cri_loss: 0.0216522216796875|unsuper_loss: 0.0
-average reward score: 5.81640625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.59%) |Training time=0.45s (20.76%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4870|ppo_ep: 1|act_loss: 0.03594970703125|cri_loss: 0.01934814453125|unsuper_loss: 0.0
-average reward score: 5.26953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.36%) |Training time=0.45s (20.99%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4871|ppo_ep: 1|act_loss: -0.0149078369140625|cri_loss: -0.00624847412109375|unsuper_loss: 0.0
-average reward score: 5.1484375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.65%) |Training time=0.44s (20.67%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4872|ppo_ep: 1|act_loss: -0.0032520294189453125|cri_loss: -0.0011539459228515625|unsuper_loss: 0.0
-average reward score: 5.171875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.66%) |Training time=0.44s (20.68%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4873|ppo_ep: 1|act_loss: -0.00739288330078125|cri_loss: -0.00341796875|unsuper_loss: 0.0
-average reward score: 4.87890625
--------------------------------------------------------------------------------------
-|E2E latency=2.51s |Gather latency=0.00s (0.00%) |Generate time=1.60s (63.72%) |Training time=0.44s (17.57%) |Others=0.47 (18.71%)|CurSamplesPerSec=12.74 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4874|ppo_ep: 1|act_loss: 0.0113525390625|cri_loss: 0.005992889404296875|unsuper_loss: 0.0
-average reward score: 6.44140625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.33%) |Training time=0.45s (21.00%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4875|ppo_ep: 1|act_loss: -0.00506591796875|cri_loss: -0.001888275146484375|unsuper_loss: 0.0
-average reward score: 6.421875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.54%) |Training time=0.45s (20.81%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4876|ppo_ep: 1|act_loss: -0.01038360595703125|cri_loss: -0.0050811767578125|unsuper_loss: 0.0
-average reward score: 4.63671875
--------------------------------------------------------------------------------------
-|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.60s (71.42%) |Training time=0.47s (20.88%) |Others=0.17 (7.70%)|CurSamplesPerSec=14.29 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4877|ppo_ep: 1|act_loss: -0.023468017578125|cri_loss: -0.01141357421875|unsuper_loss: 0.0
-average reward score: 5.8828125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.72%) |Training time=0.44s (20.41%) |Others=0.10 (4.87%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4878|ppo_ep: 1|act_loss: 0.00634002685546875|cri_loss: 0.003414154052734375|unsuper_loss: 0.0
-average reward score: 5.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.27%) |Training time=0.45s (20.81%) |Others=0.11 (4.93%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.47
-[2023-04-14 11:47:20,508] [INFO] [logging.py:96:log_dist] [Rank 0] step=4880, skipped=70, lr=[3.6661401263059195e-06, 3.6661401263059195e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:47:20,682] [INFO] [timer.py:199:stop] epoch=0/micro_step=4880/global_step=4880, RunningAvgSamplesPerSec=105.21670595000673, CurrSamplesPerSec=63.53029762995129, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:47:20,777] [INFO] [logging.py:96:log_dist] [Rank 0] step=4880, skipped=82, lr=[1.9107727292970774e-06, 1.9107727292970774e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4879|ppo_ep: 1|act_loss: 0.030609130859375|cri_loss: 0.0157623291015625|unsuper_loss: 0.0
-average reward score: 5.63671875
--------------------------------------------------------------------------------------
-|E2E latency=2.41s |Gather latency=0.00s (0.00%) |Generate time=1.63s (67.36%) |Training time=0.69s (28.47%) |Others=0.10 (4.16%)|CurSamplesPerSec=13.26 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4880|ppo_ep: 1|act_loss: -0.0046234130859375|cri_loss: -0.0019855499267578125|unsuper_loss: 0.0
-average reward score: 6.265625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.48%) |Training time=0.41s (18.75%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4881|ppo_ep: 1|act_loss: 0.01279449462890625|cri_loss: 0.0069427490234375|unsuper_loss: 0.0
-average reward score: 5.1015625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.03%) |Training time=0.47s (21.30%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4882|ppo_ep: 1|act_loss: -0.03192138671875|cri_loss: -0.0152435302734375|unsuper_loss: 0.0
-average reward score: 4.90625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.37%) |Training time=0.46s (20.94%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4883|ppo_ep: 1|act_loss: -0.0025081634521484375|cri_loss: -0.00106048583984375|unsuper_loss: 0.0
-average reward score: 4.70703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.17%) |Training time=0.46s (21.15%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4884|ppo_ep: 1|act_loss: 0.020965576171875|cri_loss: 0.01074981689453125|unsuper_loss: 0.0
-average reward score: 6.15625
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.62%) |Training time=0.48s (21.72%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4885|ppo_ep: 1|act_loss: -0.0005536079406738281|cri_loss: 0.00018978118896484375|unsuper_loss: 0.0
-average reward score: 5.95703125
--------------------------------------------------------------------------------------
-|E2E latency=2.65s |Gather latency=0.00s (0.00%) |Generate time=1.63s (61.59%) |Training time=0.46s (17.21%) |Others=0.56 (21.21%)|CurSamplesPerSec=12.07 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4886|ppo_ep: 1|act_loss: 0.026092529296875|cri_loss: 0.01351165771484375|unsuper_loss: 0.0
-average reward score: 5.44140625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.18%) |Training time=0.46s (21.20%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4887|ppo_ep: 1|act_loss: 0.021514892578125|cri_loss: 0.01136016845703125|unsuper_loss: 0.0
-average reward score: 4.515625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.42%) |Training time=0.48s (21.97%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4888|ppo_ep: 1|act_loss: 0.00982666015625|cri_loss: 0.005466461181640625|unsuper_loss: 0.0
-average reward score: 5.39453125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.69%) |Training time=0.47s (21.70%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.47
-[2023-04-14 11:47:42,965] [INFO] [logging.py:96:log_dist] [Rank 0] step=4890, skipped=70, lr=[3.6481163004770804e-06, 3.6481163004770804e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:47:42,983] [INFO] [timer.py:199:stop] epoch=0/micro_step=4890/global_step=4890, RunningAvgSamplesPerSec=105.22021353634193, CurrSamplesPerSec=99.59072874905486, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:47:43,076] [INFO] [logging.py:96:log_dist] [Rank 0] step=4890, skipped=82, lr=[1.9014232959984846e-06, 1.9014232959984846e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4889|ppo_ep: 1|act_loss: 0.009552001953125|cri_loss: 0.00537109375|unsuper_loss: 0.0
-average reward score: 6.1640625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.29%) |Training time=0.48s (22.12%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4890|ppo_ep: 1|act_loss: 0.017578125|cri_loss: 0.009979248046875|unsuper_loss: 0.0
-average reward score: 6.390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.56%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4891|ppo_ep: 1|act_loss: 0.0318603515625|cri_loss: 0.016632080078125|unsuper_loss: 0.0
-average reward score: 5.734375
--------------------------------------------------------------------------------------
-|E2E latency=2.59s |Gather latency=0.00s (0.00%) |Generate time=1.77s (68.22%) |Training time=0.48s (18.37%) |Others=0.35 (13.41%)|CurSamplesPerSec=12.34 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4892|ppo_ep: 1|act_loss: 0.033935546875|cri_loss: 0.0182037353515625|unsuper_loss: 0.0
-average reward score: 5.05859375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.21%) |Training time=0.48s (22.17%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4893|ppo_ep: 1|act_loss: 0.0110015869140625|cri_loss: 0.006687164306640625|unsuper_loss: 0.0
-average reward score: 4.71484375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.53%) |Training time=0.48s (21.84%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4894|ppo_ep: 1|act_loss: -0.01580810546875|cri_loss: -0.00707244873046875|unsuper_loss: 0.0
-average reward score: 4.953125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.97%) |Training time=0.47s (21.41%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4895|ppo_ep: 1|act_loss: -0.0055999755859375|cri_loss: -0.0019741058349609375|unsuper_loss: 0.0
-average reward score: 5.48046875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.00%) |Training time=0.46s (21.38%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4896|ppo_ep: 1|act_loss: -0.039031982421875|cri_loss: -0.0185546875|unsuper_loss: 0.0
-average reward score: 5.296875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.46s (21.33%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4897|ppo_ep: 1|act_loss: 0.0193634033203125|cri_loss: 0.0103302001953125|unsuper_loss: 0.0
-average reward score: 5.5859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.47%) |Training time=0.48s (21.91%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4898|ppo_ep: 1|act_loss: 0.00010347366333007812|cri_loss: 0.0005092620849609375|unsuper_loss: 0.0
-average reward score: 4.90625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.55%) |Training time=0.47s (21.82%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.47
-[2023-04-14 11:48:05,138] [INFO] [logging.py:96:log_dist] [Rank 0] step=4900, skipped=70, lr=[3.6301099189037464e-06, 3.6301099189037464e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:48:05,156] [INFO] [timer.py:199:stop] epoch=0/micro_step=4900/global_step=4900, RunningAvgSamplesPerSec=105.21540054820468, CurrSamplesPerSec=98.67209316003058, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:48:05,251] [INFO] [logging.py:96:log_dist] [Rank 0] step=4900, skipped=82, lr=[1.892082735050632e-06, 1.892082735050632e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4899|ppo_ep: 1|act_loss: -0.01531982421875|cri_loss: -0.00739288330078125|unsuper_loss: 0.0
-average reward score: 5.91796875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.09%) |Training time=0.49s (22.24%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4900|ppo_ep: 1|act_loss: -0.0083160400390625|cri_loss: -0.0040283203125|unsuper_loss: 0.0
-average reward score: 5.30078125
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.43%) |Training time=0.51s (23.02%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.49 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4901|ppo_ep: 1|act_loss: -0.04620361328125|cri_loss: -0.0215606689453125|unsuper_loss: 0.0
-average reward score: 5.1484375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.58%) |Training time=0.47s (21.74%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4902|ppo_ep: 1|act_loss: 0.06402587890625|cri_loss: 0.034637451171875|unsuper_loss: 0.0
-average reward score: 4.89453125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.45%) |Training time=0.45s (20.77%) |Others=0.10 (4.78%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4903|ppo_ep: 1|act_loss: 0.04351806640625|cri_loss: 0.0232696533203125|unsuper_loss: 0.0
-average reward score: 4.90234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.85%) |Training time=0.42s (19.47%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4904|ppo_ep: 1|act_loss: -0.001407623291015625|cri_loss: -0.00046324729919433594|unsuper_loss: 0.0
-average reward score: 4.47265625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.50%) |Training time=0.48s (21.84%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4905|ppo_ep: 1|act_loss: 0.0149078369140625|cri_loss: 0.007965087890625|unsuper_loss: 0.0
-average reward score: 5.1484375
--------------------------------------------------------------------------------------
-|E2E latency=2.41s |Gather latency=0.00s (0.00%) |Generate time=1.61s (66.96%) |Training time=0.47s (19.60%) |Others=0.32 (13.44%)|CurSamplesPerSec=13.28 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4906|ppo_ep: 1|act_loss: 0.036285400390625|cri_loss: 0.02142333984375|unsuper_loss: 0.0
-average reward score: 5.6484375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.25%) |Training time=0.47s (21.13%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4907|ppo_ep: 1|act_loss: -0.0006389617919921875|cri_loss: 0.000934600830078125|unsuper_loss: 0.0
-average reward score: 5.359375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.27%) |Training time=0.46s (21.10%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4908|ppo_ep: 1|act_loss: -0.0016384124755859375|cri_loss: -0.0007228851318359375|unsuper_loss: 0.0
-average reward score: 4.84375
--------------------------------------------------------------------------------------
-|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.74s (75.90%) |Training time=0.45s (19.69%) |Others=0.10 (4.42%)|CurSamplesPerSec=13.96 |AvgSamplesPerSec=14.47
-[2023-04-14 11:48:27,335] [INFO] [logging.py:96:log_dist] [Rank 0] step=4910, skipped=70, lr=[3.6121212484839295e-06, 3.6121212484839295e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:48:27,353] [INFO] [timer.py:199:stop] epoch=0/micro_step=4910/global_step=4910, RunningAvgSamplesPerSec=105.21634977782396, CurrSamplesPerSec=102.24609105099212, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:48:27,446] [INFO] [logging.py:96:log_dist] [Rank 0] step=4910, skipped=82, lr=[1.882751184903167e-06, 1.882751184903167e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4909|ppo_ep: 1|act_loss: -0.01042938232421875|cri_loss: -0.00495147705078125|unsuper_loss: 0.0
-average reward score: 4.76953125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.62%) |Training time=0.48s (21.78%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4910|ppo_ep: 1|act_loss: -0.005237579345703125|cri_loss: -0.0025177001953125|unsuper_loss: 0.0
-average reward score: 5.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.34%) |Training time=0.48s (22.05%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4911|ppo_ep: 1|act_loss: -0.02166748046875|cri_loss: -0.010040283203125|unsuper_loss: 0.0
-average reward score: 4.90625
--------------------------------------------------------------------------------------
-|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.60s (71.55%) |Training time=0.47s (21.12%) |Others=0.16 (7.32%)|CurSamplesPerSec=14.28 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4912|ppo_ep: 1|act_loss: -0.04510498046875|cri_loss: -0.02032470703125|unsuper_loss: 0.0
-average reward score: 4.21484375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.35%) |Training time=0.48s (22.03%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4913|ppo_ep: 1|act_loss: -0.019134521484375|cri_loss: -0.00905609130859375|unsuper_loss: 0.0
-average reward score: 5.00390625
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.64s (73.88%) |Training time=0.48s (21.47%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.46 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4914|ppo_ep: 1|act_loss: 0.01371002197265625|cri_loss: 0.006977081298828125|unsuper_loss: 0.0
-average reward score: 5.13671875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.21%) |Training time=0.46s (21.14%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4915|ppo_ep: 1|act_loss: 0.01544189453125|cri_loss: 0.00787353515625|unsuper_loss: 0.0
-average reward score: 5.33984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.30%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4916|ppo_ep: 1|act_loss: 0.017669677734375|cri_loss: 0.00927734375|unsuper_loss: 0.0
-average reward score: 5.33984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.75%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4917|ppo_ep: 1|act_loss: 0.04022216796875|cri_loss: 0.021209716796875|unsuper_loss: 0.0
-average reward score: 4.89453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.60%) |Training time=0.47s (21.76%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4918|ppo_ep: 1|act_loss: 0.020233154296875|cri_loss: 0.0102386474609375|unsuper_loss: 0.0
-average reward score: 5.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.69%) |Training time=0.47s (21.66%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.47
-[2023-04-14 11:48:49,170] [INFO] [logging.py:96:log_dist] [Rank 0] step=4920, skipped=70, lr=[3.594150555853121e-06, 3.594150555853121e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:48:49,187] [INFO] [timer.py:199:stop] epoch=0/micro_step=4920/global_step=4920, RunningAvgSamplesPerSec=105.21233766377085, CurrSamplesPerSec=101.24344625791944, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:48:49,280] [INFO] [logging.py:96:log_dist] [Rank 0] step=4920, skipped=82, lr=[1.8734287838721742e-06, 1.8734287838721742e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4919|ppo_ep: 1|act_loss: 0.045318603515625|cri_loss: 0.024871826171875|unsuper_loss: 0.0
-average reward score: 5.671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.29%) |Training time=0.48s (22.08%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4920|ppo_ep: 1|act_loss: 0.04046630859375|cri_loss: 0.02252197265625|unsuper_loss: 0.0
-average reward score: 5.15625
--------------------------------------------------------------------------------------
-|E2E latency=2.38s |Gather latency=0.00s (0.00%) |Generate time=1.69s (70.85%) |Training time=0.59s (24.92%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.44 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4921|ppo_ep: 1|act_loss: -0.029449462890625|cri_loss: -0.01418304443359375|unsuper_loss: 0.0
-average reward score: 4.5234375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.30%) |Training time=0.48s (22.05%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4922|ppo_ep: 1|act_loss: -0.01837158203125|cri_loss: -0.0085601806640625|unsuper_loss: 0.0
-average reward score: 5.328125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.47s (21.93%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4923|ppo_ep: 1|act_loss: 0.047027587890625|cri_loss: 0.0258941650390625|unsuper_loss: 0.0
-average reward score: 4.68359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.74%) |Training time=0.47s (21.63%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4924|ppo_ep: 1|act_loss: 0.0205841064453125|cri_loss: 0.010498046875|unsuper_loss: 0.0
-average reward score: 4.75390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.47s (21.94%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4925|ppo_ep: 1|act_loss: 0.00734710693359375|cri_loss: 0.003772735595703125|unsuper_loss: 0.0
-average reward score: 4.86328125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (22.00%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4926|ppo_ep: 1|act_loss: -0.010284423828125|cri_loss: -0.004993438720703125|unsuper_loss: 0.0
-average reward score: 4.35546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.78%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4927|ppo_ep: 1|act_loss: -0.03948974609375|cri_loss: -0.0186767578125|unsuper_loss: 0.0
-average reward score: 5.28515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.31%) |Training time=0.48s (22.07%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4928|ppo_ep: 1|act_loss: -0.00012969970703125|cri_loss: 0.0004787445068359375|unsuper_loss: 0.0
-average reward score: 4.94140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.84%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.47
-[2023-04-14 11:49:11,006] [INFO] [logging.py:96:log_dist] [Rank 0] step=4930, skipped=70, lr=[3.576198107380337e-06, 3.576198107380337e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:49:11,024] [INFO] [timer.py:199:stop] epoch=0/micro_step=4930/global_step=4930, RunningAvgSamplesPerSec=105.20099548291466, CurrSamplesPerSec=102.2384583684684, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:49:11,117] [INFO] [logging.py:96:log_dist] [Rank 0] step=4930, skipped=82, lr=[1.8641156701381243e-06, 1.8641156701381243e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4929|ppo_ep: 1|act_loss: 0.003734588623046875|cri_loss: 0.003139495849609375|unsuper_loss: 0.0
-average reward score: 4.70703125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (21.97%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4930|ppo_ep: 1|act_loss: 0.0060272216796875|cri_loss: 0.0032634735107421875|unsuper_loss: 0.0
-average reward score: 4.55859375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.78%) |Training time=0.49s (22.61%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4931|ppo_ep: 1|act_loss: 0.07098388671875|cri_loss: 0.03961181640625|unsuper_loss: 0.0
-average reward score: 4.91015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.06%) |Training time=0.49s (22.34%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4932|ppo_ep: 1|act_loss: 0.013092041015625|cri_loss: 0.0085296630859375|unsuper_loss: 0.0
-average reward score: 5.203125
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.59s (69.88%) |Training time=0.49s (21.32%) |Others=0.20 (8.80%)|CurSamplesPerSec=14.06 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4933|ppo_ep: 1|act_loss: -0.0150146484375|cri_loss: -0.007354736328125|unsuper_loss: 0.0
-average reward score: 5.41796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.47s (21.85%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4934|ppo_ep: 1|act_loss: -0.023773193359375|cri_loss: -0.0106201171875|unsuper_loss: 0.0
-average reward score: 4.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.26%) |Training time=0.48s (22.13%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4935|ppo_ep: 1|act_loss: -0.062286376953125|cri_loss: -0.0286865234375|unsuper_loss: 0.0
-average reward score: 4.9921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.57%) |Training time=0.47s (21.81%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4936|ppo_ep: 1|act_loss: -0.036224365234375|cri_loss: -0.01629638671875|unsuper_loss: 0.0
-average reward score: 6.27734375
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.50%) |Training time=0.47s (20.21%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4937|ppo_ep: 1|act_loss: -0.03900146484375|cri_loss: -0.0185546875|unsuper_loss: 0.0
-average reward score: 5.05859375
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.62s (71.80%) |Training time=0.49s (21.56%) |Others=0.15 (6.64%)|CurSamplesPerSec=14.18 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4938|ppo_ep: 1|act_loss: 0.005672454833984375|cri_loss: 0.00292205810546875|unsuper_loss: 0.0
-average reward score: 5.17578125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.64%) |Training time=0.47s (21.75%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.47
-[2023-04-14 11:49:33,102] [INFO] [logging.py:96:log_dist] [Rank 0] step=4940, skipped=70, lr=[3.5582641691641734e-06, 3.5582641691641734e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:49:33,121] [INFO] [timer.py:199:stop] epoch=0/micro_step=4940/global_step=4940, RunningAvgSamplesPerSec=105.19055610924273, CurrSamplesPerSec=97.9630665250696, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:49:33,213] [INFO] [logging.py:96:log_dist] [Rank 0] step=4940, skipped=82, lr=[1.8548119817438313e-06, 1.8548119817438313e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4939|ppo_ep: 1|act_loss: -0.02252197265625|cri_loss: -0.01056671142578125|unsuper_loss: 0.0
-average reward score: 5.61328125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.97%) |Training time=0.49s (22.45%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4940|ppo_ep: 1|act_loss: 0.033447265625|cri_loss: 0.0177001953125|unsuper_loss: 0.0
-average reward score: 5.4609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.62%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4941|ppo_ep: 1|act_loss: 0.01522064208984375|cri_loss: 0.00787353515625|unsuper_loss: 0.0
-average reward score: 4.125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.80%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4942|ppo_ep: 1|act_loss: 0.037078857421875|cri_loss: 0.0198516845703125|unsuper_loss: 0.0
-average reward score: 4.9296875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.68%) |Training time=0.47s (21.68%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4943|ppo_ep: 1|act_loss: 0.009552001953125|cri_loss: 0.00495147705078125|unsuper_loss: 0.0
-average reward score: 5.87890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.70%) |Training time=0.47s (21.68%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4944|ppo_ep: 1|act_loss: -0.0019121170043945312|cri_loss: -0.0006513595581054688|unsuper_loss: 0.0
-average reward score: 4.28515625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.36%) |Training time=0.48s (22.00%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4945|ppo_ep: 1|act_loss: 0.001495361328125|cri_loss: 0.0010557174682617188|unsuper_loss: 0.0
-average reward score: 5.1015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.73%) |Training time=0.47s (21.65%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4946|ppo_ep: 1|act_loss: 0.0030002593994140625|cri_loss: 0.001708984375|unsuper_loss: 0.0
-average reward score: 4.83203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (21.98%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4947|ppo_ep: 1|act_loss: 0.00046539306640625|cri_loss: 0.0007371902465820312|unsuper_loss: 0.0
-average reward score: 5.08203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.91%) |Training time=0.49s (22.48%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4948|ppo_ep: 1|act_loss: -0.00258636474609375|cri_loss: -0.0011606216430664062|unsuper_loss: 0.0
-average reward score: 5.5625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.06%) |Training time=0.48s (22.31%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.47
-[2023-04-14 11:49:54,750] [INFO] [logging.py:96:log_dist] [Rank 0] step=4950, skipped=70, lr=[3.5403490070288552e-06, 3.5403490070288552e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:49:54,769] [INFO] [timer.py:199:stop] epoch=0/micro_step=4950/global_step=4950, RunningAvgSamplesPerSec=105.18537741541601, CurrSamplesPerSec=103.79347362934539, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:49:54,861] [INFO] [logging.py:96:log_dist] [Rank 0] step=4950, skipped=82, lr=[1.8455178565923993e-06, 1.8455178565923993e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4949|ppo_ep: 1|act_loss: -0.017669677734375|cri_loss: -0.00864410400390625|unsuper_loss: 0.0
-average reward score: 4.9296875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.55%) |Training time=0.47s (21.83%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4950|ppo_ep: 1|act_loss: -0.01171875|cri_loss: -0.0056610107421875|unsuper_loss: 0.0
-average reward score: 5.71875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.08%) |Training time=0.48s (22.32%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4951|ppo_ep: 1|act_loss: -0.0146484375|cri_loss: -0.007099151611328125|unsuper_loss: 0.0
-average reward score: 4.75
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.97%) |Training time=0.49s (22.41%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4952|ppo_ep: 1|act_loss: -0.01824951171875|cri_loss: -0.0089569091796875|unsuper_loss: 0.0
-average reward score: 5.54296875
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.72s (73.83%) |Training time=0.50s (21.57%) |Others=0.11 (4.60%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4953|ppo_ep: 1|act_loss: 0.00946044921875|cri_loss: 0.005218505859375|unsuper_loss: 0.0
-average reward score: 5.58203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.10%) |Training time=0.48s (22.30%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4954|ppo_ep: 1|act_loss: -0.01007080078125|cri_loss: -0.0048370361328125|unsuper_loss: 0.0
-average reward score: 5.49609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.89%) |Training time=0.49s (22.52%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4955|ppo_ep: 1|act_loss: -0.00392913818359375|cri_loss: -0.0012559890747070312|unsuper_loss: 0.0
-average reward score: 4.26171875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.31%) |Training time=0.48s (22.06%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4956|ppo_ep: 1|act_loss: -0.0238494873046875|cri_loss: -0.0116119384765625|unsuper_loss: 0.0
-average reward score: 5.6640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.09%) |Training time=0.48s (22.31%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4957|ppo_ep: 1|act_loss: -0.0015230178833007812|cri_loss: -3.814697265625e-05|unsuper_loss: 0.0
-average reward score: 4.828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.18%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4958|ppo_ep: 1|act_loss: -0.002532958984375|cri_loss: -0.0006256103515625|unsuper_loss: 0.0
-average reward score: 4.70703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (21.98%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.47
-[2023-04-14 11:50:16,589] [INFO] [logging.py:96:log_dist] [Rank 0] step=4960, skipped=70, lr=[3.5224528865203054e-06, 3.5224528865203054e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:50:16,607] [INFO] [timer.py:199:stop] epoch=0/micro_step=4960/global_step=4960, RunningAvgSamplesPerSec=105.17390656645385, CurrSamplesPerSec=102.20575977508565, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:50:16,700] [INFO] [logging.py:96:log_dist] [Rank 0] step=4960, skipped=82, lr=[1.8362334324451853e-06, 1.8362334324451853e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4959|ppo_ep: 1|act_loss: -0.0043487548828125|cri_loss: -0.002040863037109375|unsuper_loss: 0.0
-average reward score: 4.890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.48s (21.94%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4960|ppo_ep: 1|act_loss: -0.0313720703125|cri_loss: -0.0146636962890625|unsuper_loss: 0.0
-average reward score: 5.5
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.30%) |Training time=0.48s (21.95%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4961|ppo_ep: 1|act_loss: -0.01314544677734375|cri_loss: -0.00518798828125|unsuper_loss: 0.0
-average reward score: 5.328125
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.72%) |Training time=0.50s (22.71%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4962|ppo_ep: 1|act_loss: -0.002475738525390625|cri_loss: -0.001026153564453125|unsuper_loss: 0.0
-average reward score: 5.55859375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.03%) |Training time=0.49s (22.35%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4963|ppo_ep: 1|act_loss: 0.06939697265625|cri_loss: 0.03741455078125|unsuper_loss: 0.0
-average reward score: 6.1875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.13%) |Training time=0.48s (22.27%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4964|ppo_ep: 1|act_loss: 0.00904083251953125|cri_loss: 0.00525665283203125|unsuper_loss: 0.0
-average reward score: 5.7578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.79%) |Training time=0.49s (22.60%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4965|ppo_ep: 1|act_loss: 0.002620697021484375|cri_loss: 0.0014257431030273438|unsuper_loss: 0.0
-average reward score: 4.8515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.12%) |Training time=0.48s (21.96%) |Others=0.11 (4.92%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4966|ppo_ep: 1|act_loss: 0.003570556640625|cri_loss: 0.0019779205322265625|unsuper_loss: 0.0
-average reward score: 4.97265625
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.60s (70.65%) |Training time=0.51s (22.61%) |Others=0.15 (6.74%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4967|ppo_ep: 1|act_loss: 0.01971435546875|cri_loss: 0.01021575927734375|unsuper_loss: 0.0
-average reward score: 5.37109375
--------------------------------------------------------------------------------------
-|E2E latency=2.43s |Gather latency=0.00s (0.00%) |Generate time=1.80s (73.83%) |Training time=0.54s (22.05%) |Others=0.10 (4.12%)|CurSamplesPerSec=13.14 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4968|ppo_ep: 1|act_loss: 0.004993438720703125|cri_loss: 0.0026988983154296875|unsuper_loss: 0.0
-average reward score: 7.1328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.05%) |Training time=0.48s (22.33%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.47
-[2023-04-14 11:50:38,698] [INFO] [logging.py:96:log_dist] [Rank 0] step=4970, skipped=70, lr=[3.5045760729022005e-06, 3.5045760729022005e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:50:38,716] [INFO] [timer.py:199:stop] epoch=0/micro_step=4970/global_step=4970, RunningAvgSamplesPerSec=105.15504260283433, CurrSamplesPerSec=98.29984817535782, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:50:38,809] [INFO] [logging.py:96:log_dist] [Rank 0] step=4970, skipped=82, lr=[1.8269588469197518e-06, 1.8269588469197518e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4969|ppo_ep: 1|act_loss: 0.00392913818359375|cri_loss: 0.00235748291015625|unsuper_loss: 0.0
-average reward score: 5.50390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.90%) |Training time=0.49s (22.51%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4970|ppo_ep: 1|act_loss: 0.01861572265625|cri_loss: 0.00971221923828125|unsuper_loss: 0.0
-average reward score: 5.15234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.92%) |Training time=0.49s (22.45%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4971|ppo_ep: 1|act_loss: 0.010284423828125|cri_loss: 0.005462646484375|unsuper_loss: 0.0
-average reward score: 5.0
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.94%) |Training time=0.49s (22.44%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4972|ppo_ep: 1|act_loss: 0.0051422119140625|cri_loss: 0.002857208251953125|unsuper_loss: 0.0
-average reward score: 5.84765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.81%) |Training time=0.49s (22.56%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4973|ppo_ep: 1|act_loss: -0.009521484375|cri_loss: -0.0037899017333984375|unsuper_loss: 0.0
-average reward score: 5.8125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.59%) |Training time=0.50s (22.80%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4974|ppo_ep: 1|act_loss: -0.014678955078125|cri_loss: -0.00605010986328125|unsuper_loss: 0.0
-average reward score: 4.63671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.68%) |Training time=0.47s (21.69%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4975|ppo_ep: 1|act_loss: 0.0279693603515625|cri_loss: 0.01470947265625|unsuper_loss: 0.0
-average reward score: 5.94140625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.75%) |Training time=0.47s (21.66%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4976|ppo_ep: 1|act_loss: 0.014923095703125|cri_loss: 0.00783538818359375|unsuper_loss: 0.0
-average reward score: 4.3984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.15%) |Training time=0.48s (22.23%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4977|ppo_ep: 1|act_loss: 4.2438507080078125e-05|cri_loss: 0.00018358230590820312|unsuper_loss: 0.0
-average reward score: 5.58203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.47%) |Training time=0.47s (21.89%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4978|ppo_ep: 1|act_loss: 0.0005669593811035156|cri_loss: 0.0005650520324707031|unsuper_loss: 0.0
-average reward score: 5.31640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.08%) |Training time=0.48s (22.32%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.47
-[2023-04-14 11:51:00,406] [INFO] [logging.py:96:log_dist] [Rank 0] step=4980, skipped=70, lr=[3.4867188311520448e-06, 3.4867188311520448e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:51:00,424] [INFO] [timer.py:199:stop] epoch=0/micro_step=4980/global_step=4980, RunningAvgSamplesPerSec=105.14420331571479, CurrSamplesPerSec=101.33762187033065, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:51:00,517] [INFO] [logging.py:96:log_dist] [Rank 0] step=4980, skipped=82, lr=[1.817694237487831e-06, 1.817694237487831e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4979|ppo_ep: 1|act_loss: -0.0105438232421875|cri_loss: -0.004886627197265625|unsuper_loss: 0.0
-average reward score: 4.90234375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.25%) |Training time=0.48s (22.14%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4980|ppo_ep: 1|act_loss: -0.0079193115234375|cri_loss: -0.003604888916015625|unsuper_loss: 0.0
-average reward score: 4.36328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.18%) |Training time=0.48s (22.20%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4981|ppo_ep: 1|act_loss: 0.03680419921875|cri_loss: 0.0190582275390625|unsuper_loss: 0.0
-average reward score: 6.2421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.06%) |Training time=0.48s (22.27%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4982|ppo_ep: 1|act_loss: -0.0011386871337890625|cri_loss: -0.00029850006103515625|unsuper_loss: 0.0
-average reward score: 5.54296875
--------------------------------------------------------------------------------------
-|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.37%) |Training time=0.50s (21.38%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.60 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4983|ppo_ep: 1|act_loss: -0.024505615234375|cri_loss: -0.0117034912109375|unsuper_loss: 0.0
-average reward score: 4.6015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.47s (21.78%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4984|ppo_ep: 1|act_loss: -0.02398681640625|cri_loss: -0.011444091796875|unsuper_loss: 0.0
-average reward score: 4.4765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.12%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4985|ppo_ep: 1|act_loss: 0.014617919921875|cri_loss: 0.007610321044921875|unsuper_loss: 0.0
-average reward score: 4.84375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.48s (22.32%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4986|ppo_ep: 1|act_loss: 0.00223541259765625|cri_loss: 0.0012683868408203125|unsuper_loss: 0.0
-average reward score: 5.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.15%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4987|ppo_ep: 1|act_loss: -0.008880615234375|cri_loss: -0.00402069091796875|unsuper_loss: 0.0
-average reward score: 5.1640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.35%) |Training time=0.47s (21.90%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4988|ppo_ep: 1|act_loss: 0.018798828125|cri_loss: 0.01041412353515625|unsuper_loss: 0.0
-average reward score: 4.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.47s (21.94%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.47
-[2023-04-14 11:51:22,236] [INFO] [logging.py:96:log_dist] [Rank 0] step=4990, skipped=70, lr=[3.4688814259572385e-06, 3.4688814259572385e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:51:22,255] [INFO] [timer.py:199:stop] epoch=0/micro_step=4990/global_step=4990, RunningAvgSamplesPerSec=105.13524135430049, CurrSamplesPerSec=104.26232727521308, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:51:22,348] [INFO] [logging.py:96:log_dist] [Rank 0] step=4990, skipped=82, lr=[1.8084397414732826e-06, 1.8084397414732826e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4989|ppo_ep: 1|act_loss: 0.007671356201171875|cri_loss: 0.004161834716796875|unsuper_loss: 0.0
-average reward score: 6.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.47s (21.77%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4990|ppo_ep: 1|act_loss: 0.00853729248046875|cri_loss: 0.00528717041015625|unsuper_loss: 0.0
-average reward score: 4.984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.89%) |Training time=0.46s (21.44%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4991|ppo_ep: 1|act_loss: 0.004535675048828125|cri_loss: 0.0027008056640625|unsuper_loss: 0.0
-average reward score: 5.0625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.41%) |Training time=0.47s (21.95%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4992|ppo_ep: 1|act_loss: 0.0101776123046875|cri_loss: 0.005615234375|unsuper_loss: 0.0
-average reward score: 5.0703125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.35%) |Training time=0.45s (21.01%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4993|ppo_ep: 1|act_loss: 0.0082550048828125|cri_loss: 0.00426483154296875|unsuper_loss: 0.0
-average reward score: 5.05859375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.24%) |Training time=0.46s (21.11%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4994|ppo_ep: 1|act_loss: -0.0047760009765625|cri_loss: -0.002117156982421875|unsuper_loss: 0.0
-average reward score: 5.609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.22%) |Training time=0.46s (21.12%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4995|ppo_ep: 1|act_loss: 0.05169677734375|cri_loss: 0.0268707275390625|unsuper_loss: 0.0
-average reward score: 4.87890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.31%) |Training time=0.46s (21.08%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4996|ppo_ep: 1|act_loss: -0.016204833984375|cri_loss: -0.00792694091796875|unsuper_loss: 0.0
-average reward score: 4.71875
--------------------------------------------------------------------------------------
-|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.64s (73.51%) |Training time=0.45s (20.20%) |Others=0.14 (6.29%)|CurSamplesPerSec=14.37 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4997|ppo_ep: 1|act_loss: 0.01568603515625|cri_loss: 0.0084228515625|unsuper_loss: 0.0
-average reward score: 5.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.50%) |Training time=0.45s (20.89%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.47
-epoch: 0|step: 4998|ppo_ep: 1|act_loss: 0.03155517578125|cri_loss: 0.0169525146484375|unsuper_loss: 0.0
-average reward score: 5.49609375
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.23%) |Training time=0.45s (19.46%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.78 |AvgSamplesPerSec=14.47
-[2023-04-14 11:51:44,060] [INFO] [logging.py:96:log_dist] [Rank 0] step=5000, skipped=70, lr=[3.4510641217111588e-06, 3.4510641217111588e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:51:44,078] [INFO] [timer.py:199:stop] epoch=0/micro_step=5000/global_step=5000, RunningAvgSamplesPerSec=105.14249500141725, CurrSamplesPerSec=110.73403004604508, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:51:44,171] [INFO] [logging.py:96:log_dist] [Rank 0] step=5000, skipped=82, lr=[1.7991954960500646e-06, 1.7991954960500646e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 4999|ppo_ep: 1|act_loss: 0.030792236328125|cri_loss: 0.01605224609375|unsuper_loss: 0.0
-average reward score: 5.6640625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.36%) |Training time=0.45s (21.01%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47
-epoch: 0|step: 5000|ppo_ep: 1|act_loss: 0.006744384765625|cri_loss: 0.003589630126953125|unsuper_loss: 0.0
-average reward score: 6.08203125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.17%) |Training time=0.46s (21.17%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.47
-epoch: 0|step: 5001|ppo_ep: 1|act_loss: 0.0019435882568359375|cri_loss: 0.001239776611328125|unsuper_loss: 0.0
-average reward score: 5.2109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.11%) |Training time=0.46s (21.24%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.47
-epoch: 0|step: 5002|ppo_ep: 1|act_loss: -0.00066375732421875|cri_loss: -0.0001690387725830078|unsuper_loss: 0.0
-average reward score: 4.44140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.70%) |Training time=0.46s (21.08%) |Others=0.11 (5.23%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.47
-epoch: 0|step: 5003|ppo_ep: 1|act_loss: -0.00406646728515625|cri_loss: -0.0018291473388671875|unsuper_loss: 0.0
-average reward score: 4.70703125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.08%) |Training time=0.46s (21.26%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.47
-epoch: 0|step: 5004|ppo_ep: 1|act_loss: -0.012481689453125|cri_loss: -0.00572967529296875|unsuper_loss: 0.0
-average reward score: 5.55859375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.45s (21.06%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.47
-epoch: 0|step: 5005|ppo_ep: 1|act_loss: -0.0100860595703125|cri_loss: -0.00489044189453125|unsuper_loss: 0.0
-average reward score: 4.91796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.06%) |Training time=0.46s (21.27%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.47
-epoch: 0|step: 5006|ppo_ep: 1|act_loss: 0.0216827392578125|cri_loss: 0.01190185546875|unsuper_loss: 0.0
-average reward score: 5.67578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.16%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.47
-epoch: 0|step: 5007|ppo_ep: 1|act_loss: -0.004222869873046875|cri_loss: -0.0015840530395507812|unsuper_loss: 0.0
-average reward score: 5.71484375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.29%) |Training time=0.45s (21.07%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.47
-epoch: 0|step: 5008|ppo_ep: 1|act_loss: 0.00676727294921875|cri_loss: 0.003551483154296875|unsuper_loss: 0.0
-average reward score: 6.16015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.18%) |Training time=0.46s (21.18%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.47
-[2023-04-14 11:52:05,635] [INFO] [logging.py:96:log_dist] [Rank 0] step=5010, skipped=70, lr=[3.4332671825092336e-06, 3.4332671825092336e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:52:05,654] [INFO] [timer.py:199:stop] epoch=0/micro_step=5010/global_step=5010, RunningAvgSamplesPerSec=105.14970221940123, CurrSamplesPerSec=110.66372811649983, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:52:05,747] [INFO] [logging.py:96:log_dist] [Rank 0] step=5010, skipped=82, lr=[1.7899616382401935e-06, 1.7899616382401935e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5009|ppo_ep: 1|act_loss: 0.01934814453125|cri_loss: 0.0098876953125|unsuper_loss: 0.0
-average reward score: 5.3046875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.36%) |Training time=0.45s (20.99%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.47
-epoch: 0|step: 5010|ppo_ep: 1|act_loss: 0.0182952880859375|cri_loss: 0.0097503662109375|unsuper_loss: 0.0
-average reward score: 4.62109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.46s (21.16%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.47
-epoch: 0|step: 5011|ppo_ep: 1|act_loss: 0.02813720703125|cri_loss: 0.01523590087890625|unsuper_loss: 0.0
-average reward score: 6.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.16%) |Training time=0.46s (21.19%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.47
-epoch: 0|step: 5012|ppo_ep: 1|act_loss: 0.058380126953125|cri_loss: 0.0302886962890625|unsuper_loss: 0.0
-average reward score: 6.0703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.58%) |Training time=0.45s (20.86%) |Others=0.12 (5.56%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.47
-epoch: 0|step: 5013|ppo_ep: 1|act_loss: -0.017242431640625|cri_loss: -0.00830841064453125|unsuper_loss: 0.0
-average reward score: 6.109375
--------------------------------------------------------------------------------------
-|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.73s (75.64%) |Training time=0.46s (19.97%) |Others=0.10 (4.39%)|CurSamplesPerSec=13.98 |AvgSamplesPerSec=14.47
-epoch: 0|step: 5014|ppo_ep: 1|act_loss: -0.01389312744140625|cri_loss: -0.0064849853515625|unsuper_loss: 0.0
-average reward score: 4.3046875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.09%) |Training time=0.46s (21.27%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47
-epoch: 0|step: 5015|ppo_ep: 1|act_loss: 0.0277099609375|cri_loss: 0.0140838623046875|unsuper_loss: 0.0
-average reward score: 5.85546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.17%) |Training time=0.46s (21.20%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.47
-epoch: 0|step: 5016|ppo_ep: 1|act_loss: 0.0049591064453125|cri_loss: 0.002826690673828125|unsuper_loss: 0.0
-average reward score: 4.80078125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.54%) |Training time=0.45s (20.81%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.47
-epoch: 0|step: 5017|ppo_ep: 1|act_loss: 0.002994537353515625|cri_loss: 0.0016851425170898438|unsuper_loss: 0.0
-average reward score: 5.60546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (20.97%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.47
-epoch: 0|step: 5018|ppo_ep: 1|act_loss: 0.0037860870361328125|cri_loss: 0.0021533966064453125|unsuper_loss: 0.0
-average reward score: 5.61328125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.18%) |Training time=0.46s (21.17%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.47
-[2023-04-14 11:52:27,314] [INFO] [logging.py:96:log_dist] [Rank 0] step=5020, skipped=70, lr=[3.4154908721450375e-06, 3.4154908721450375e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:52:27,333] [INFO] [timer.py:199:stop] epoch=0/micro_step=5020/global_step=5020, RunningAvgSamplesPerSec=105.15842373077857, CurrSamplesPerSec=111.53061617927632, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:52:27,425] [INFO] [logging.py:96:log_dist] [Rank 0] step=5020, skipped=82, lr=[1.7807383049117185e-06, 1.7807383049117185e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5019|ppo_ep: 1|act_loss: -0.00528717041015625|cri_loss: -0.0023040771484375|unsuper_loss: 0.0
-average reward score: 5.6640625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.44%) |Training time=0.45s (20.91%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47
-epoch: 0|step: 5020|ppo_ep: 1|act_loss: -0.02325439453125|cri_loss: -0.01102447509765625|unsuper_loss: 0.0
-average reward score: 5.89453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.74%) |Training time=0.45s (20.60%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.47
-epoch: 0|step: 5021|ppo_ep: 1|act_loss: -0.0023365020751953125|cri_loss: -0.0009179115295410156|unsuper_loss: 0.0
-average reward score: 4.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.46%) |Training time=0.45s (20.89%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.47
-epoch: 0|step: 5022|ppo_ep: 1|act_loss: 0.02752685546875|cri_loss: 0.014404296875|unsuper_loss: 0.0
-average reward score: 4.6875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.24%) |Training time=0.46s (21.12%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.47
-epoch: 0|step: 5023|ppo_ep: 1|act_loss: -0.00722503662109375|cri_loss: -0.0035037994384765625|unsuper_loss: 0.0
-average reward score: 5.44140625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.23%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.47
-epoch: 0|step: 5024|ppo_ep: 1|act_loss: -0.030426025390625|cri_loss: -0.01476287841796875|unsuper_loss: 0.0
-average reward score: 6.5
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.39%) |Training time=0.45s (20.97%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.47
-epoch: 0|step: 5025|ppo_ep: 1|act_loss: -0.03466796875|cri_loss: -0.0169677734375|unsuper_loss: 0.0
-average reward score: 5.98828125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.48%) |Training time=0.45s (20.80%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.47
-epoch: 0|step: 5026|ppo_ep: 1|act_loss: 0.00052642822265625|cri_loss: 0.0003490447998046875|unsuper_loss: 0.0
-average reward score: 4.234375
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.56%) |Training time=0.45s (19.97%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.20 |AvgSamplesPerSec=14.47
-epoch: 0|step: 5027|ppo_ep: 1|act_loss: 0.012939453125|cri_loss: 0.0068511962890625|unsuper_loss: 0.0
-average reward score: 6.3203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.03%) |Training time=0.46s (21.34%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.47
-epoch: 0|step: 5028|ppo_ep: 1|act_loss: 0.01593017578125|cri_loss: 0.00855255126953125|unsuper_loss: 0.0
-average reward score: 5.2578125
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.34%) |Training time=0.45s (19.32%) |Others=0.10 (4.34%)|CurSamplesPerSec=13.80 |AvgSamplesPerSec=14.47
-[2023-04-14 11:52:49,163] [INFO] [logging.py:96:log_dist] [Rank 0] step=5030, skipped=70, lr=[3.397735454106371e-06, 3.397735454106371e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:52:49,181] [INFO] [timer.py:199:stop] epoch=0/micro_step=5030/global_step=5030, RunningAvgSamplesPerSec=105.16850634145916, CurrSamplesPerSec=111.15689603482346, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:52:49,274] [INFO] [logging.py:96:log_dist] [Rank 0] step=5030, skipped=82, lr=[1.7715256327766887e-06, 1.7715256327766887e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5029|ppo_ep: 1|act_loss: 0.00400543212890625|cri_loss: 0.002166748046875|unsuper_loss: 0.0
-average reward score: 5.8359375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.40%) |Training time=0.45s (20.95%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47
-epoch: 0|step: 5030|ppo_ep: 1|act_loss: 0.051605224609375|cri_loss: 0.027069091796875|unsuper_loss: 0.0
-average reward score: 5.23828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.30%) |Training time=0.46s (21.10%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.47
-epoch: 0|step: 5031|ppo_ep: 1|act_loss: 0.003597259521484375|cri_loss: 0.0019397735595703125|unsuper_loss: 0.0
-average reward score: 5.23046875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.34%) |Training time=0.45s (21.01%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.47
-epoch: 0|step: 5032|ppo_ep: 1|act_loss: 0.017059326171875|cri_loss: 0.0095672607421875|unsuper_loss: 0.0
-average reward score: 6.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.22%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.47
-epoch: 0|step: 5033|ppo_ep: 1|act_loss: 0.00244903564453125|cri_loss: 0.00151824951171875|unsuper_loss: 0.0
-average reward score: 5.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.16%) |Training time=0.45s (20.97%) |Others=0.10 (4.87%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.47
-epoch: 0|step: 5034|ppo_ep: 1|act_loss: -0.0243072509765625|cri_loss: -0.01183319091796875|unsuper_loss: 0.0
-average reward score: 4.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.25%) |Training time=0.45s (21.07%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.47
-epoch: 0|step: 5035|ppo_ep: 1|act_loss: -0.00789642333984375|cri_loss: -0.0035724639892578125|unsuper_loss: 0.0
-average reward score: 5.9140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.89%) |Training time=0.47s (21.48%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.47
-epoch: 0|step: 5036|ppo_ep: 1|act_loss: -0.0139312744140625|cri_loss: -0.006259918212890625|unsuper_loss: 0.0
-average reward score: 5.37890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.07%) |Training time=0.46s (21.23%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.47
-epoch: 0|step: 5037|ppo_ep: 1|act_loss: 0.03485107421875|cri_loss: 0.0181884765625|unsuper_loss: 0.0
-average reward score: 5.23046875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.86%) |Training time=0.44s (20.45%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.47
-epoch: 0|step: 5038|ppo_ep: 1|act_loss: -0.0124359130859375|cri_loss: -0.005916595458984375|unsuper_loss: 0.0
-average reward score: 4.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.56%) |Training time=0.44s (20.76%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.47
-[2023-04-14 11:53:10,712] [INFO] [logging.py:96:log_dist] [Rank 0] step=5040, skipped=70, lr=[3.380001191571363e-06, 3.380001191571363e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:53:10,731] [INFO] [timer.py:199:stop] epoch=0/micro_step=5040/global_step=5040, RunningAvgSamplesPerSec=105.17816044808174, CurrSamplesPerSec=113.32568471926469, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:53:10,823] [INFO] [logging.py:96:log_dist] [Rank 0] step=5040, skipped=82, lr=[1.7623237583891302e-06, 1.7623237583891302e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5039|ppo_ep: 1|act_loss: 0.00580596923828125|cri_loss: 0.0030307769775390625|unsuper_loss: 0.0
-average reward score: 6.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.57%) |Training time=0.44s (20.76%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.47
-epoch: 0|step: 5040|ppo_ep: 1|act_loss: 0.0025310516357421875|cri_loss: 0.0014247894287109375|unsuper_loss: 0.0
-average reward score: 5.55078125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.55%) |Training time=0.45s (20.79%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.47
-epoch: 0|step: 5041|ppo_ep: 1|act_loss: 0.0207977294921875|cri_loss: 0.0117645263671875|unsuper_loss: 0.0
-average reward score: 5.35546875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.51%) |Training time=0.45s (20.80%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.47
-epoch: 0|step: 5042|ppo_ep: 1|act_loss: -0.002902984619140625|cri_loss: -0.0012054443359375|unsuper_loss: 0.0
-average reward score: 6.1796875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.61%) |Training time=0.44s (20.71%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.47
-epoch: 0|step: 5043|ppo_ep: 1|act_loss: -0.0191192626953125|cri_loss: -0.0092010498046875|unsuper_loss: 0.0
-average reward score: 6.94140625
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.54%) |Training time=0.47s (20.16%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.47
-epoch: 0|step: 5044|ppo_ep: 1|act_loss: 0.036712646484375|cri_loss: 0.019195556640625|unsuper_loss: 0.0
-average reward score: 5.23046875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.28%) |Training time=0.45s (21.07%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.47
-epoch: 0|step: 5045|ppo_ep: 1|act_loss: -0.0135040283203125|cri_loss: -0.00640869140625|unsuper_loss: 0.0
-average reward score: 5.015625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.92%) |Training time=0.44s (20.39%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.47
-epoch: 0|step: 5046|ppo_ep: 1|act_loss: -0.01305389404296875|cri_loss: -0.005558013916015625|unsuper_loss: 0.0
-average reward score: 5.72265625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.61%) |Training time=0.44s (20.73%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5047|ppo_ep: 1|act_loss: 0.01013946533203125|cri_loss: 0.005229949951171875|unsuper_loss: 0.0
-average reward score: 4.7109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.42%) |Training time=0.45s (20.92%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5048|ppo_ep: 1|act_loss: -0.0214691162109375|cri_loss: -0.010406494140625|unsuper_loss: 0.0
-average reward score: 5.8828125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.71%) |Training time=0.44s (20.62%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.48
-[2023-04-14 11:53:32,354] [INFO] [logging.py:96:log_dist] [Rank 0] step=5050, skipped=70, lr=[3.3622883474045655e-06, 3.3622883474045655e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:53:32,372] [INFO] [timer.py:199:stop] epoch=0/micro_step=5050/global_step=5050, RunningAvgSamplesPerSec=105.19147333167017, CurrSamplesPerSec=115.78938432684527, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:53:32,465] [INFO] [logging.py:96:log_dist] [Rank 0] step=5050, skipped=82, lr=[1.7531328181430188e-06, 1.7531328181430188e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5049|ppo_ep: 1|act_loss: -0.008087158203125|cri_loss: -0.0038394927978515625|unsuper_loss: 0.0
-average reward score: 5.66015625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.85%) |Training time=0.44s (20.47%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5050|ppo_ep: 1|act_loss: -0.0283050537109375|cri_loss: -0.01366424560546875|unsuper_loss: 0.0
-average reward score: 4.7890625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.51%) |Training time=0.45s (20.83%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5051|ppo_ep: 1|act_loss: -0.0078582763671875|cri_loss: -0.0035495758056640625|unsuper_loss: 0.0
-average reward score: 3.90234375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.23%) |Training time=0.49s (22.18%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5052|ppo_ep: 1|act_loss: 0.010894775390625|cri_loss: 0.005992889404296875|unsuper_loss: 0.0
-average reward score: 6.34375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.18%) |Training time=0.46s (21.05%) |Others=0.10 (4.78%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5053|ppo_ep: 1|act_loss: -0.018035888671875|cri_loss: -0.00865936279296875|unsuper_loss: 0.0
-average reward score: 5.54296875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.06%) |Training time=0.44s (20.20%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5054|ppo_ep: 1|act_loss: 0.00310516357421875|cri_loss: 0.00226593017578125|unsuper_loss: 0.0
-average reward score: 6.3359375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.80%) |Training time=0.45s (20.50%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5055|ppo_ep: 1|act_loss: 0.0013713836669921875|cri_loss: 0.0008897781372070312|unsuper_loss: 0.0
-average reward score: 5.65625
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.48%) |Training time=0.44s (19.84%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.51 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5056|ppo_ep: 1|act_loss: 0.0013103485107421875|cri_loss: 0.0010128021240234375|unsuper_loss: 0.0
-average reward score: 5.65625
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.73s (76.33%) |Training time=0.43s (19.13%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.13 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5057|ppo_ep: 1|act_loss: -0.0010585784912109375|cri_loss: -0.0004253387451171875|unsuper_loss: 0.0
-average reward score: 5.15234375
--------------------------------------------------------------------------------------
-|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.64s (73.10%) |Training time=0.46s (20.41%) |Others=0.15 (6.49%)|CurSamplesPerSec=14.27 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5058|ppo_ep: 1|act_loss: 0.0149688720703125|cri_loss: 0.00787353515625|unsuper_loss: 0.0
-average reward score: 5.3125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.50%) |Training time=0.41s (18.78%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.48
-[2023-04-14 11:53:54,288] [INFO] [logging.py:96:log_dist] [Rank 0] step=5060, skipped=70, lr=[3.34459718415306e-06, 3.34459718415306e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:53:54,306] [INFO] [timer.py:199:stop] epoch=0/micro_step=5060/global_step=5060, RunningAvgSamplesPerSec=105.20452398487495, CurrSamplesPerSec=101.51252440100772, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:53:54,399] [INFO] [logging.py:96:log_dist] [Rank 0] step=5060, skipped=82, lr=[1.7439529482702621e-06, 1.7439529482702621e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5059|ppo_ep: 1|act_loss: 0.025421142578125|cri_loss: 0.01319122314453125|unsuper_loss: 0.0
-average reward score: 4.3359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.06%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5060|ppo_ep: 1|act_loss: -0.00881195068359375|cri_loss: -0.00376129150390625|unsuper_loss: 0.0
-average reward score: 5.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.52%) |Training time=0.47s (21.76%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5061|ppo_ep: 1|act_loss: -0.020477294921875|cri_loss: -0.0099945068359375|unsuper_loss: 0.0
-average reward score: 5.015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.46s (21.51%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5062|ppo_ep: 1|act_loss: -0.0039825439453125|cri_loss: -0.0017499923706054688|unsuper_loss: 0.0
-average reward score: 6.15234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.47s (21.88%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5063|ppo_ep: 1|act_loss: -0.0006098747253417969|cri_loss: -0.00013113021850585938|unsuper_loss: 0.0
-average reward score: 4.94140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.62%) |Training time=0.47s (21.66%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5064|ppo_ep: 1|act_loss: -0.018096923828125|cri_loss: -0.00875091552734375|unsuper_loss: 0.0
-average reward score: 5.40234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.47s (21.85%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5065|ppo_ep: 1|act_loss: -0.0052947998046875|cri_loss: -0.00177001953125|unsuper_loss: 0.0
-average reward score: 5.28125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.51%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5066|ppo_ep: 1|act_loss: -0.033935546875|cri_loss: -0.0165252685546875|unsuper_loss: 0.0
-average reward score: 6.1953125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.06%) |Training time=0.49s (22.25%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5067|ppo_ep: 1|act_loss: -0.0284423828125|cri_loss: -0.012725830078125|unsuper_loss: 0.0
-average reward score: 4.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.47s (21.73%) |Others=0.10 (4.82%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5068|ppo_ep: 1|act_loss: 0.0450439453125|cri_loss: 0.0234527587890625|unsuper_loss: 0.0
-average reward score: 6.26171875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.47%) |Training time=0.47s (21.87%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.48
-[2023-04-14 11:54:15,945] [INFO] [logging.py:96:log_dist] [Rank 0] step=5070, skipped=70, lr=[3.326927964042562e-06, 3.326927964042562e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:54:15,963] [INFO] [timer.py:199:stop] epoch=0/micro_step=5070/global_step=5070, RunningAvgSamplesPerSec=105.20114928702438, CurrSamplesPerSec=102.08806252258077, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:54:16,056] [INFO] [logging.py:96:log_dist] [Rank 0] step=5070, skipped=82, lr=[1.734784284838676e-06, 1.734784284838676e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5069|ppo_ep: 1|act_loss: 0.03436279296875|cri_loss: 0.0176849365234375|unsuper_loss: 0.0
-average reward score: 4.53515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.48s (21.96%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5070|ppo_ep: 1|act_loss: 0.022369384765625|cri_loss: 0.0124053955078125|unsuper_loss: 0.0
-average reward score: 4.06640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.92%) |Training time=0.49s (22.42%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5071|ppo_ep: 1|act_loss: 0.0203399658203125|cri_loss: 0.0111236572265625|unsuper_loss: 0.0
-average reward score: 6.125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.79%) |Training time=0.49s (22.55%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5072|ppo_ep: 1|act_loss: 0.00518798828125|cri_loss: 0.0029449462890625|unsuper_loss: 0.0
-average reward score: 4.8125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.17%) |Training time=0.48s (22.16%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5073|ppo_ep: 1|act_loss: -0.034881591796875|cri_loss: -0.01507568359375|unsuper_loss: 0.0
-average reward score: 5.96875
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.74s (74.68%) |Training time=0.49s (20.99%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5074|ppo_ep: 1|act_loss: 0.001644134521484375|cri_loss: 0.0011005401611328125|unsuper_loss: 0.0
-average reward score: 5.12109375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.73%) |Training time=0.49s (22.64%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5075|ppo_ep: 1|act_loss: 0.001613616943359375|cri_loss: 0.004695892333984375|unsuper_loss: 0.0
-average reward score: 4.4765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.97%) |Training time=0.49s (22.41%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5076|ppo_ep: 1|act_loss: -0.02197265625|cri_loss: -0.01043701171875|unsuper_loss: 0.0
-average reward score: 4.91015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.78%) |Training time=0.49s (22.57%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5077|ppo_ep: 1|act_loss: -0.00782012939453125|cri_loss: -0.0032196044921875|unsuper_loss: 0.0
-average reward score: 5.3984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.09%) |Training time=0.48s (22.26%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5078|ppo_ep: 1|act_loss: 0.00946044921875|cri_loss: 0.00511932373046875|unsuper_loss: 0.0
-average reward score: 5.64453125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.66%) |Training time=0.49s (22.74%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.48
-[2023-04-14 11:54:37,791] [INFO] [logging.py:96:log_dist] [Rank 0] step=5080, skipped=70, lr=[3.309280948973539e-06, 3.309280948973539e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:54:37,809] [INFO] [timer.py:199:stop] epoch=0/micro_step=5080/global_step=5080, RunningAvgSamplesPerSec=105.1869830044589, CurrSamplesPerSec=97.66836872434452, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:54:37,902] [INFO] [logging.py:96:log_dist] [Rank 0] step=5080, skipped=82, lr=[1.725626963749971e-06, 1.725626963749971e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5079|ppo_ep: 1|act_loss: 0.003009796142578125|cri_loss: 0.00188446044921875|unsuper_loss: 0.0
-average reward score: 5.93359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.81%) |Training time=0.49s (22.58%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5080|ppo_ep: 1|act_loss: 0.009307861328125|cri_loss: 0.00501251220703125|unsuper_loss: 0.0
-average reward score: 4.3515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.95%) |Training time=0.49s (22.45%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5081|ppo_ep: 1|act_loss: 0.03912353515625|cri_loss: 0.0212554931640625|unsuper_loss: 0.0
-average reward score: 4.7578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.16%) |Training time=0.46s (21.19%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5082|ppo_ep: 1|act_loss: -0.02783203125|cri_loss: -0.0126953125|unsuper_loss: 0.0
-average reward score: 5.19140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.29%) |Training time=0.48s (22.09%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5083|ppo_ep: 1|act_loss: -0.0003559589385986328|cri_loss: 4.00543212890625e-05|unsuper_loss: 0.0
-average reward score: 5.48828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (21.91%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5084|ppo_ep: 1|act_loss: -0.01495361328125|cri_loss: -0.007282257080078125|unsuper_loss: 0.0
-average reward score: 6.19140625
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.45%) |Training time=0.44s (19.90%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5085|ppo_ep: 1|act_loss: -0.029693603515625|cri_loss: -0.01422119140625|unsuper_loss: 0.0
-average reward score: 6.265625
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.73s (73.78%) |Training time=0.50s (21.41%) |Others=0.11 (4.80%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5086|ppo_ep: 1|act_loss: -0.016754150390625|cri_loss: -0.0082244873046875|unsuper_loss: 0.0
-average reward score: 4.765625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.00%) |Training time=0.46s (21.31%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5087|ppo_ep: 1|act_loss: -0.0077972412109375|cri_loss: -0.003520965576171875|unsuper_loss: 0.0
-average reward score: 5.06640625
--------------------------------------------------------------------------------------
-|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.05%) |Training time=0.49s (20.63%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.50 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5088|ppo_ep: 1|act_loss: -0.0093994140625|cri_loss: -0.004535675048828125|unsuper_loss: 0.0
-average reward score: 6.8359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.20%) |Training time=0.46s (21.04%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.48
-[2023-04-14 11:54:59,905] [INFO] [logging.py:96:log_dist] [Rank 0] step=5090, skipped=70, lr=[3.291656400517325e-06, 3.291656400517325e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:54:59,923] [INFO] [timer.py:199:stop] epoch=0/micro_step=5090/global_step=5090, RunningAvgSamplesPerSec=105.1857576987737, CurrSamplesPerSec=112.81056883645637, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:55:00,016] [INFO] [logging.py:96:log_dist] [Rank 0] step=5090, skipped=82, lr=[1.7164811207377364e-06, 1.7164811207377364e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5089|ppo_ep: 1|act_loss: 0.03240966796875|cri_loss: 0.016815185546875|unsuper_loss: 0.0
-average reward score: 5.37890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.76%) |Training time=0.45s (20.55%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5090|ppo_ep: 1|act_loss: 0.01641845703125|cri_loss: 0.00855255126953125|unsuper_loss: 0.0
-average reward score: 4.61328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.59%) |Training time=0.45s (20.72%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5091|ppo_ep: 1|act_loss: 0.004547119140625|cri_loss: 0.002780914306640625|unsuper_loss: 0.0
-average reward score: 4.9609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.77%) |Training time=0.47s (21.56%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5092|ppo_ep: 1|act_loss: 0.001316070556640625|cri_loss: 0.0007786750793457031|unsuper_loss: 0.0
-average reward score: 5.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.94%) |Training time=0.46s (21.36%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5093|ppo_ep: 1|act_loss: 0.020843505859375|cri_loss: 0.01065826416015625|unsuper_loss: 0.0
-average reward score: 3.98046875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.91%) |Training time=0.46s (21.38%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5094|ppo_ep: 1|act_loss: 0.0036144256591796875|cri_loss: 0.002353668212890625|unsuper_loss: 0.0
-average reward score: 4.765625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.63%) |Training time=0.47s (21.71%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5095|ppo_ep: 1|act_loss: -0.01236724853515625|cri_loss: -0.005863189697265625|unsuper_loss: 0.0
-average reward score: 5.55859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.80%) |Training time=0.47s (21.51%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5096|ppo_ep: 1|act_loss: -0.01849365234375|cri_loss: -0.00894927978515625|unsuper_loss: 0.0
-average reward score: 6.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.37%) |Training time=0.48s (21.97%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5097|ppo_ep: 1|act_loss: 0.0105133056640625|cri_loss: 0.0054931640625|unsuper_loss: 0.0
-average reward score: 5.24609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.27%) |Training time=0.46s (21.04%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5098|ppo_ep: 1|act_loss: -0.031341552734375|cri_loss: -0.01502227783203125|unsuper_loss: 0.0
-average reward score: 4.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.57%) |Training time=0.45s (20.76%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.48
-[2023-04-14 11:55:21,634] [INFO] [logging.py:96:log_dist] [Rank 0] step=5100, skipped=70, lr=[3.2740545799122467e-06, 3.2740545799122467e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:55:21,653] [INFO] [timer.py:199:stop] epoch=0/micro_step=5100/global_step=5100, RunningAvgSamplesPerSec=105.18997521642397, CurrSamplesPerSec=117.28935534330132, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:55:21,745] [INFO] [logging.py:96:log_dist] [Rank 0] step=5100, skipped=82, lr=[1.707346891365429e-06, 1.707346891365429e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5099|ppo_ep: 1|act_loss: -0.01549530029296875|cri_loss: -0.007568359375|unsuper_loss: 0.0
-average reward score: 4.921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.24%) |Training time=0.44s (20.11%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5100|ppo_ep: 1|act_loss: 0.0120697021484375|cri_loss: 0.006183624267578125|unsuper_loss: 0.0
-average reward score: 5.3984375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.84%) |Training time=0.44s (20.44%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5101|ppo_ep: 1|act_loss: 0.01096343994140625|cri_loss: 0.005687713623046875|unsuper_loss: 0.0
-average reward score: 4.8515625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.55%) |Training time=0.44s (20.76%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5102|ppo_ep: 1|act_loss: -0.017669677734375|cri_loss: -0.00824737548828125|unsuper_loss: 0.0
-average reward score: 5.90625
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.65s (73.06%) |Training time=0.51s (22.46%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5103|ppo_ep: 1|act_loss: -0.026336669921875|cri_loss: -0.01267242431640625|unsuper_loss: 0.0
-average reward score: 4.98046875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.46%) |Training time=0.45s (20.89%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5104|ppo_ep: 1|act_loss: 0.019775390625|cri_loss: 0.01068115234375|unsuper_loss: 0.0
-average reward score: 5.125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.58%) |Training time=0.44s (20.71%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5105|ppo_ep: 1|act_loss: 0.024261474609375|cri_loss: 0.0125885009765625|unsuper_loss: 0.0
-average reward score: 5.84375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.62%) |Training time=0.44s (20.69%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5106|ppo_ep: 1|act_loss: -0.05169677734375|cri_loss: -0.0247955322265625|unsuper_loss: 0.0
-average reward score: 4.359375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.44%) |Training time=0.45s (20.89%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5107|ppo_ep: 1|act_loss: -0.0169830322265625|cri_loss: -0.00818634033203125|unsuper_loss: 0.0
-average reward score: 5.28515625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.50%) |Training time=0.45s (20.79%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5108|ppo_ep: 1|act_loss: 0.0013885498046875|cri_loss: 0.0008983612060546875|unsuper_loss: 0.0
-average reward score: 4.78125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.33%) |Training time=0.45s (20.98%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.48
-[2023-04-14 11:55:43,186] [INFO] [logging.py:96:log_dist] [Rank 0] step=5110, skipped=70, lr=[3.256475748059745e-06, 3.256475748059745e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:55:43,204] [INFO] [timer.py:199:stop] epoch=0/micro_step=5110/global_step=5110, RunningAvgSamplesPerSec=105.20440380442844, CurrSamplesPerSec=112.62106255936578, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:55:43,297] [INFO] [logging.py:96:log_dist] [Rank 0] step=5110, skipped=82, lr=[1.6982244110243626e-06, 1.6982244110243626e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5109|ppo_ep: 1|act_loss: 0.00341033935546875|cri_loss: 0.001926422119140625|unsuper_loss: 0.0
-average reward score: 5.0625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.49%) |Training time=0.45s (20.83%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5110|ppo_ep: 1|act_loss: 0.006072998046875|cri_loss: 0.0032405853271484375|unsuper_loss: 0.0
-average reward score: 6.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.32%) |Training time=0.45s (21.02%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5111|ppo_ep: 1|act_loss: -0.02947998046875|cri_loss: -0.01439666748046875|unsuper_loss: 0.0
-average reward score: 7.17578125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.19%) |Training time=0.45s (21.15%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5112|ppo_ep: 1|act_loss: -0.005096435546875|cri_loss: -0.00234222412109375|unsuper_loss: 0.0
-average reward score: 4.328125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.46s (21.19%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5113|ppo_ep: 1|act_loss: -0.017669677734375|cri_loss: -0.00860595703125|unsuper_loss: 0.0
-average reward score: 5.546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.03%) |Training time=0.46s (21.34%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5114|ppo_ep: 1|act_loss: -0.01715087890625|cri_loss: -0.0082550048828125|unsuper_loss: 0.0
-average reward score: 5.71875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.95%) |Training time=0.44s (20.41%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5115|ppo_ep: 1|act_loss: -0.0175323486328125|cri_loss: -0.00847625732421875|unsuper_loss: 0.0
-average reward score: 5.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (75.06%) |Training time=0.46s (20.48%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.19 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5116|ppo_ep: 1|act_loss: 0.01445770263671875|cri_loss: 0.007415771484375|unsuper_loss: 0.0
-average reward score: 4.45703125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.78%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5117|ppo_ep: 1|act_loss: 0.035400390625|cri_loss: 0.018798828125|unsuper_loss: 0.0
-average reward score: 5.4296875
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.64s (71.04%) |Training time=0.55s (23.99%) |Others=0.11 (4.96%)|CurSamplesPerSec=13.84 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5118|ppo_ep: 1|act_loss: 0.00244140625|cri_loss: 0.001384735107421875|unsuper_loss: 0.0
-average reward score: 6.84375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.85%) |Training time=0.46s (21.50%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.48
-[2023-04-14 11:56:05,008] [INFO] [logging.py:96:log_dist] [Rank 0] step=5120, skipped=70, lr=[3.2389201655205164e-06, 3.2389201655205164e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:56:05,026] [INFO] [timer.py:199:stop] epoch=0/micro_step=5120/global_step=5120, RunningAvgSamplesPerSec=105.20632463741921, CurrSamplesPerSec=105.29688544396153, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:56:05,120] [INFO] [logging.py:96:log_dist] [Rank 0] step=5120, skipped=82, lr=[1.6891138149317038e-06, 1.6891138149317038e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5119|ppo_ep: 1|act_loss: 0.02301025390625|cri_loss: 0.011810302734375|unsuper_loss: 0.0
-average reward score: 5.53125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.65%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5120|ppo_ep: 1|act_loss: 0.00733184814453125|cri_loss: 0.00391387939453125|unsuper_loss: 0.0
-average reward score: 5.390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.85%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5121|ppo_ep: 1|act_loss: 0.0180816650390625|cri_loss: 0.00943756103515625|unsuper_loss: 0.0
-average reward score: 5.26171875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.61%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5122|ppo_ep: 1|act_loss: 0.0029296875|cri_loss: 0.00183868408203125|unsuper_loss: 0.0
-average reward score: 4.53125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.47s (21.58%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5123|ppo_ep: 1|act_loss: 0.0094451904296875|cri_loss: 0.004840850830078125|unsuper_loss: 0.0
-average reward score: 5.57421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.60%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5124|ppo_ep: 1|act_loss: -0.0184326171875|cri_loss: -0.00870513916015625|unsuper_loss: 0.0
-average reward score: 5.76171875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.64%) |Training time=0.47s (21.73%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5125|ppo_ep: 1|act_loss: -0.023223876953125|cri_loss: -0.01103973388671875|unsuper_loss: 0.0
-average reward score: 5.48046875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.90%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5126|ppo_ep: 1|act_loss: -0.01800537109375|cri_loss: -0.00847625732421875|unsuper_loss: 0.0
-average reward score: 5.42578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.75%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5127|ppo_ep: 1|act_loss: -0.0146942138671875|cri_loss: -0.00698089599609375|unsuper_loss: 0.0
-average reward score: 5.24609375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.28%) |Training time=0.48s (22.12%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5128|ppo_ep: 1|act_loss: 0.027069091796875|cri_loss: 0.01407623291015625|unsuper_loss: 0.0
-average reward score: 5.0859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.13%) |Training time=0.48s (22.26%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.48
-[2023-04-14 11:56:26,608] [INFO] [logging.py:96:log_dist] [Rank 0] step=5130, skipped=70, lr=[3.221388092510643e-06, 3.221388092510643e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:56:26,627] [INFO] [timer.py:199:stop] epoch=0/micro_step=5130/global_step=5130, RunningAvgSamplesPerSec=105.2030162387455, CurrSamplesPerSec=103.4807402444506, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:56:26,719] [INFO] [logging.py:96:log_dist] [Rank 0] step=5130, skipped=82, lr=[1.6800152381284634e-06, 1.6800152381284634e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5129|ppo_ep: 1|act_loss: 0.0220489501953125|cri_loss: 0.0125732421875|unsuper_loss: 0.0
-average reward score: 5.390625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.90%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5130|ppo_ep: 1|act_loss: -0.0015277862548828125|cri_loss: -0.000614166259765625|unsuper_loss: 0.0
-average reward score: 5.55078125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.43%) |Training time=0.47s (21.81%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5131|ppo_ep: 1|act_loss: 5.0961971282958984e-05|cri_loss: 0.00015354156494140625|unsuper_loss: 0.0
-average reward score: 5.3515625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.73%) |Training time=0.47s (21.63%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5132|ppo_ep: 1|act_loss: 0.022308349609375|cri_loss: 0.0114288330078125|unsuper_loss: 0.0
-average reward score: 5.109375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.24%) |Training time=0.51s (23.17%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5133|ppo_ep: 1|act_loss: -0.03240966796875|cri_loss: -0.01531219482421875|unsuper_loss: 0.0
-average reward score: 5.75390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.34%) |Training time=0.45s (21.02%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5134|ppo_ep: 1|act_loss: -0.001499176025390625|cri_loss: -0.0005617141723632812|unsuper_loss: 0.0
-average reward score: 4.89453125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.60%) |Training time=0.47s (21.77%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5135|ppo_ep: 1|act_loss: -0.013702392578125|cri_loss: -0.00635528564453125|unsuper_loss: 0.0
-average reward score: 5.03515625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.73%) |Training time=0.50s (22.69%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5136|ppo_ep: 1|act_loss: -0.02093505859375|cri_loss: -0.01007080078125|unsuper_loss: 0.0
-average reward score: 6.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.02%) |Training time=0.49s (22.37%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5137|ppo_ep: 1|act_loss: -0.005767822265625|cri_loss: -0.0024394989013671875|unsuper_loss: 0.0
-average reward score: 4.74609375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.88%) |Training time=0.49s (22.55%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5138|ppo_ep: 1|act_loss: -0.01102447509765625|cri_loss: -0.00304412841796875|unsuper_loss: 0.0
-average reward score: 5.5234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.22%) |Training time=0.48s (22.15%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.48
-[2023-04-14 11:56:48,341] [INFO] [logging.py:96:log_dist] [Rank 0] step=5140, skipped=70, lr=[3.203879788897743e-06, 3.203879788897743e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:56:48,359] [INFO] [timer.py:199:stop] epoch=0/micro_step=5140/global_step=5140, RunningAvgSamplesPerSec=105.19522715135726, CurrSamplesPerSec=101.88409671789803, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:56:48,452] [INFO] [logging.py:96:log_dist] [Rank 0] step=5140, skipped=82, lr=[1.6709288154775e-06, 1.6709288154775e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5139|ppo_ep: 1|act_loss: 0.0029087066650390625|cri_loss: 0.0019779205322265625|unsuper_loss: 0.0
-average reward score: 5.828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.48s (21.96%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5140|ppo_ep: 1|act_loss: -0.0158843994140625|cri_loss: -0.007659912109375|unsuper_loss: 0.0
-average reward score: 5.62890625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.00%) |Training time=0.49s (22.41%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5141|ppo_ep: 1|act_loss: 0.006500244140625|cri_loss: 0.0035247802734375|unsuper_loss: 0.0
-average reward score: 4.609375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.65%) |Training time=0.47s (21.71%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5142|ppo_ep: 1|act_loss: -0.00617218017578125|cri_loss: -0.0020294189453125|unsuper_loss: 0.0
-average reward score: 5.5703125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.31%) |Training time=0.46s (21.09%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5143|ppo_ep: 1|act_loss: 0.007320404052734375|cri_loss: 0.003997802734375|unsuper_loss: 0.0
-average reward score: 4.8203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.68%) |Training time=0.47s (21.43%) |Others=0.11 (4.89%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5144|ppo_ep: 1|act_loss: 0.0160675048828125|cri_loss: 0.00841522216796875|unsuper_loss: 0.0
-average reward score: 4.828125
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.62s (71.75%) |Training time=0.54s (23.79%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.21 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5145|ppo_ep: 1|act_loss: 0.04425048828125|cri_loss: 0.0236968994140625|unsuper_loss: 0.0
-average reward score: 5.625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.37%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5146|ppo_ep: 1|act_loss: 0.02178955078125|cri_loss: 0.01132965087890625|unsuper_loss: 0.0
-average reward score: 4.28515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.46s (21.56%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5147|ppo_ep: 1|act_loss: 0.03814697265625|cri_loss: 0.019622802734375|unsuper_loss: 0.0
-average reward score: 5.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.73%) |Training time=0.47s (21.64%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5148|ppo_ep: 1|act_loss: 0.012054443359375|cri_loss: 0.0062408447265625|unsuper_loss: 0.0
-average reward score: 4.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.19%) |Training time=0.51s (23.24%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.48
-[2023-04-14 11:57:10,149] [INFO] [logging.py:96:log_dist] [Rank 0] step=5150, skipped=70, lr=[3.186395514197109e-06, 3.186395514197109e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:57:10,168] [INFO] [timer.py:199:stop] epoch=0/micro_step=5150/global_step=5150, RunningAvgSamplesPerSec=105.18848918908238, CurrSamplesPerSec=107.31580260338376, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:57:10,261] [INFO] [logging.py:96:log_dist] [Rank 0] step=5150, skipped=82, lr=[1.6618546816615162e-06, 1.6618546816615162e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5149|ppo_ep: 1|act_loss: 0.004528045654296875|cri_loss: 0.002605438232421875|unsuper_loss: 0.0
-average reward score: 4.08984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.79%) |Training time=0.47s (21.59%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5150|ppo_ep: 1|act_loss: -0.020782470703125|cri_loss: -0.01001739501953125|unsuper_loss: 0.0
-average reward score: 4.7421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.82%) |Training time=0.47s (21.55%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5151|ppo_ep: 1|act_loss: 0.0063323974609375|cri_loss: 0.0055999755859375|unsuper_loss: 0.0
-average reward score: 4.18359375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.10%) |Training time=0.46s (21.23%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5152|ppo_ep: 1|act_loss: -0.0015277862548828125|cri_loss: -0.0006008148193359375|unsuper_loss: 0.0
-average reward score: 4.44140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.73%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5153|ppo_ep: 1|act_loss: 0.0142669677734375|cri_loss: 0.0084686279296875|unsuper_loss: 0.0
-average reward score: 5.046875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.42%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5154|ppo_ep: 1|act_loss: -0.004833221435546875|cri_loss: -0.002147674560546875|unsuper_loss: 0.0
-average reward score: 5.59375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.47s (21.58%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5155|ppo_ep: 1|act_loss: -0.0033206939697265625|cri_loss: -0.0005054473876953125|unsuper_loss: 0.0
-average reward score: 5.3125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.65%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5156|ppo_ep: 1|act_loss: -0.02227783203125|cri_loss: -0.01050567626953125|unsuper_loss: 0.0
-average reward score: 5.1875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.32%) |Training time=0.45s (21.00%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5157|ppo_ep: 1|act_loss: -0.030426025390625|cri_loss: -0.01453399658203125|unsuper_loss: 0.0
-average reward score: 5.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.50%) |Training time=0.45s (20.74%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5158|ppo_ep: 1|act_loss: 0.0006160736083984375|cri_loss: 0.0019016265869140625|unsuper_loss: 0.0
-average reward score: 5.59375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.50%) |Training time=0.45s (20.83%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.48
-[2023-04-14 11:57:31,697] [INFO] [logging.py:96:log_dist] [Rank 0] step=5160, skipped=70, lr=[3.1689355275678734e-06, 3.1689355275678734e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:57:31,715] [INFO] [timer.py:199:stop] epoch=0/micro_step=5160/global_step=5160, RunningAvgSamplesPerSec=105.19364901967187, CurrSamplesPerSec=107.49044406429995, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:57:31,809] [INFO] [logging.py:96:log_dist] [Rank 0] step=5160, skipped=82, lr=[1.652792971181065e-06, 1.652792971181065e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5159|ppo_ep: 1|act_loss: -0.00487518310546875|cri_loss: -0.0021419525146484375|unsuper_loss: 0.0
-average reward score: 6.12890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.46s (21.36%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5160|ppo_ep: 1|act_loss: 0.019805908203125|cri_loss: 0.011138916015625|unsuper_loss: 0.0
-average reward score: 5.14453125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.45s (21.07%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5161|ppo_ep: 1|act_loss: 0.0233612060546875|cri_loss: 0.012054443359375|unsuper_loss: 0.0
-average reward score: 5.2734375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.43%) |Training time=0.45s (20.91%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5162|ppo_ep: 1|act_loss: 0.00848388671875|cri_loss: 0.004398345947265625|unsuper_loss: 0.0
-average reward score: 5.92578125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.96%) |Training time=0.46s (21.42%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5163|ppo_ep: 1|act_loss: 0.00827789306640625|cri_loss: 0.004608154296875|unsuper_loss: 0.0
-average reward score: 5.4765625
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.19%) |Training time=0.48s (20.50%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5164|ppo_ep: 1|act_loss: 0.013702392578125|cri_loss: 0.00714874267578125|unsuper_loss: 0.0
-average reward score: 4.94140625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.23%) |Training time=0.45s (21.11%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5165|ppo_ep: 1|act_loss: 0.0002040863037109375|cri_loss: 0.00017511844635009766|unsuper_loss: 0.0
-average reward score: 4.8125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.33%) |Training time=0.45s (21.03%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5166|ppo_ep: 1|act_loss: -0.012786865234375|cri_loss: -0.006092071533203125|unsuper_loss: 0.0
-average reward score: 5.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.39%) |Training time=0.45s (20.95%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5167|ppo_ep: 1|act_loss: -0.0180206298828125|cri_loss: -0.0085296630859375|unsuper_loss: 0.0
-average reward score: 5.51171875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.22%) |Training time=0.46s (21.14%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5168|ppo_ep: 1|act_loss: 0.002552032470703125|cri_loss: 0.0014467239379882812|unsuper_loss: 0.0
-average reward score: 5.18359375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.45s (21.09%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.48
-[2023-04-14 11:57:53,391] [INFO] [logging.py:96:log_dist] [Rank 0] step=5170, skipped=70, lr=[3.151500087809158e-06, 3.151500087809158e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:57:53,410] [INFO] [timer.py:199:stop] epoch=0/micro_step=5170/global_step=5170, RunningAvgSamplesPerSec=105.20113500419411, CurrSamplesPerSec=112.40855699709468, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:57:53,494] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-[2023-04-14 11:57:53,494] [INFO] [logging.py:96:log_dist] [Rank 0] step=5170, skipped=83, lr=[1.6446481647168905e-06, 1.6446481647168905e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5169|ppo_ep: 1|act_loss: -0.0045623779296875|cri_loss: -0.00177001953125|unsuper_loss: 0.0
-average reward score: 4.703125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.93%) |Training time=0.45s (20.91%) |Others=0.09 (4.16%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.48
-[2023-04-14 11:57:55,633] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 5170|ppo_ep: 1|act_loss: 0.01143646240234375|cri_loss: 0.00624847412109375|unsuper_loss: 0.0
-average reward score: 5.3203125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.79%) |Training time=0.45s (21.02%) |Others=0.09 (4.18%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5171|ppo_ep: 1|act_loss: 0.0195159912109375|cri_loss: 0.01044464111328125|unsuper_loss: 0.0
-average reward score: 5.30859375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.68%) |Training time=0.44s (20.66%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5172|ppo_ep: 1|act_loss: -0.0211944580078125|cri_loss: -0.0100250244140625|unsuper_loss: 0.0
-average reward score: 5.875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.90%) |Training time=0.44s (20.23%) |Others=0.11 (4.87%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5173|ppo_ep: 1|act_loss: -0.00702667236328125|cri_loss: -0.0029582977294921875|unsuper_loss: 0.0
-average reward score: 5.3984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.37%) |Training time=0.43s (20.02%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5174|ppo_ep: 1|act_loss: -0.0139312744140625|cri_loss: -0.006595611572265625|unsuper_loss: 0.0
-average reward score: 6.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.71s (76.28%) |Training time=0.43s (19.25%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.26 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5175|ppo_ep: 1|act_loss: 0.001678466796875|cri_loss: 0.00147247314453125|unsuper_loss: 0.0
-average reward score: 4.75390625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.45s (21.15%) |Others=0.10 (4.82%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5176|ppo_ep: 1|act_loss: -0.0050201416015625|cri_loss: -0.0023059844970703125|unsuper_loss: 0.0
-average reward score: 4.6171875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.21%) |Training time=0.45s (21.12%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5177|ppo_ep: 1|act_loss: 0.0244293212890625|cri_loss: 0.01271820068359375|unsuper_loss: 0.0
-average reward score: 5.6875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.86%) |Training time=0.46s (21.47%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5178|ppo_ep: 1|act_loss: 0.016876220703125|cri_loss: 0.008758544921875|unsuper_loss: 0.0
-average reward score: 6.0859375
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.76s (76.19%) |Training time=0.45s (19.46%) |Others=0.10 (4.35%)|CurSamplesPerSec=13.82 |AvgSamplesPerSec=14.48
-[2023-04-14 11:58:15,143] [INFO] [logging.py:96:log_dist] [Rank 0] step=5180, skipped=70, lr=[3.13408945335624e-06, 3.13408945335624e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:58:15,161] [INFO] [timer.py:199:stop] epoch=0/micro_step=5180/global_step=5180, RunningAvgSamplesPerSec=105.21499798949904, CurrSamplesPerSec=114.18577494421204, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:58:15,254] [INFO] [logging.py:96:log_dist] [Rank 0] step=5180, skipped=84, lr=[1.6365136277409998e-06, 1.6365136277409998e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5179|ppo_ep: 1|act_loss: -0.024688720703125|cri_loss: -0.0120391845703125|unsuper_loss: 0.0
-average reward score: 6.1796875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.67%) |Training time=0.44s (20.67%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5180|ppo_ep: 1|act_loss: 0.00354766845703125|cri_loss: 0.0020904541015625|unsuper_loss: 0.0
-average reward score: 4.97265625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.01%) |Training time=0.43s (20.27%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5181|ppo_ep: 1|act_loss: 0.008453369140625|cri_loss: 0.0045013427734375|unsuper_loss: 0.0
-average reward score: 5.0234375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.71%) |Training time=0.44s (20.59%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5182|ppo_ep: 1|act_loss: -0.015106201171875|cri_loss: -0.006755828857421875|unsuper_loss: 0.0
-average reward score: 5.6953125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.63%) |Training time=0.44s (20.71%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5183|ppo_ep: 1|act_loss: 0.004367828369140625|cri_loss: 0.002498626708984375|unsuper_loss: 0.0
-average reward score: 5.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.82%) |Training time=0.44s (20.51%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5184|ppo_ep: 1|act_loss: -0.016021728515625|cri_loss: -0.00772857666015625|unsuper_loss: 0.0
-average reward score: 5.8359375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.69%) |Training time=0.44s (20.58%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5185|ppo_ep: 1|act_loss: 0.039581298828125|cri_loss: 0.0210113525390625|unsuper_loss: 0.0
-average reward score: 5.1953125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.73%) |Training time=0.44s (20.60%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5186|ppo_ep: 1|act_loss: 0.04901123046875|cri_loss: 0.0275726318359375|unsuper_loss: 0.0
-average reward score: 4.7890625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.72%) |Training time=0.44s (20.59%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5187|ppo_ep: 1|act_loss: 0.002056121826171875|cri_loss: 0.0012464523315429688|unsuper_loss: 0.0
-average reward score: 5.76171875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.83%) |Training time=0.44s (20.49%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5188|ppo_ep: 1|act_loss: 0.0018367767333984375|cri_loss: 0.001033782958984375|unsuper_loss: 0.0
-average reward score: 5.02734375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.95%) |Training time=0.44s (20.40%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.48
-[2023-04-14 11:58:36,588] [INFO] [logging.py:96:log_dist] [Rank 0] step=5190, skipped=70, lr=[3.116703882276723e-06, 3.116703882276723e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:58:36,607] [INFO] [timer.py:199:stop] epoch=0/micro_step=5190/global_step=5190, RunningAvgSamplesPerSec=105.23254817548194, CurrSamplesPerSec=114.20268043044129, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:58:36,699] [INFO] [logging.py:96:log_dist] [Rank 0] step=5190, skipped=84, lr=[1.6274874165679296e-06, 1.6274874165679296e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5189|ppo_ep: 1|act_loss: -0.01806640625|cri_loss: -0.008819580078125|unsuper_loss: 0.0
-average reward score: 4.8125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.66%) |Training time=0.44s (20.70%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5190|ppo_ep: 1|act_loss: -0.032012939453125|cri_loss: -0.015045166015625|unsuper_loss: 0.0
-average reward score: 4.8671875
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.81%) |Training time=0.44s (20.50%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5191|ppo_ep: 1|act_loss: -0.02008056640625|cri_loss: -0.0090179443359375|unsuper_loss: 0.0
-average reward score: 5.01953125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.64%) |Training time=0.44s (20.69%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5192|ppo_ep: 1|act_loss: -0.008148193359375|cri_loss: -0.003566741943359375|unsuper_loss: 0.0
-average reward score: 6.046875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.74%) |Training time=0.44s (20.58%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5193|ppo_ep: 1|act_loss: -0.00064849853515625|cri_loss: -0.000164031982421875|unsuper_loss: 0.0
-average reward score: 4.40234375
--------------------------------------------------------------------------------------
-|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.68%) |Training time=0.44s (18.98%) |Others=0.10 (4.34%)|CurSamplesPerSec=13.89 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5194|ppo_ep: 1|act_loss: -0.017242431640625|cri_loss: -0.0084228515625|unsuper_loss: 0.0
-average reward score: 5.2734375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.61%) |Training time=0.44s (20.71%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5195|ppo_ep: 1|act_loss: 0.015167236328125|cri_loss: 0.00780487060546875|unsuper_loss: 0.0
-average reward score: 4.328125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.60%) |Training time=0.44s (20.73%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5196|ppo_ep: 1|act_loss: 0.0142059326171875|cri_loss: 0.00827789306640625|unsuper_loss: 0.0
-average reward score: 4.98046875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.64%) |Training time=0.44s (20.69%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5197|ppo_ep: 1|act_loss: 0.00450897216796875|cri_loss: 0.0025196075439453125|unsuper_loss: 0.0
-average reward score: 4.5625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.35%) |Training time=0.45s (21.01%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5198|ppo_ep: 1|act_loss: -0.00450897216796875|cri_loss: -0.00199127197265625|unsuper_loss: 0.0
-average reward score: 5.0625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.76%) |Training time=0.44s (20.58%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.48
-[2023-04-14 11:58:58,181] [INFO] [logging.py:96:log_dist] [Rank 0] step=5200, skipped=70, lr=[3.0993436322667104e-06, 3.0993436322667104e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:58:58,199] [INFO] [timer.py:199:stop] epoch=0/micro_step=5200/global_step=5200, RunningAvgSamplesPerSec=105.24831529325668, CurrSamplesPerSec=111.86488639962661, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:58:58,292] [INFO] [logging.py:96:log_dist] [Rank 0] step=5200, skipped=84, lr=[1.6184741381361684e-06, 1.6184741381361684e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5199|ppo_ep: 1|act_loss: -0.01104736328125|cri_loss: -0.004955291748046875|unsuper_loss: 0.0
-average reward score: 4.76953125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.46%) |Training time=0.45s (20.90%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5200|ppo_ep: 1|act_loss: 0.0209808349609375|cri_loss: 0.0107574462890625|unsuper_loss: 0.0
-average reward score: 4.74609375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.99%) |Training time=0.44s (20.34%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5201|ppo_ep: 1|act_loss: 0.00376129150390625|cri_loss: 0.002201080322265625|unsuper_loss: 0.0
-average reward score: 6.21484375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.92%) |Training time=0.44s (20.44%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5202|ppo_ep: 1|act_loss: -0.01300048828125|cri_loss: -0.0059051513671875|unsuper_loss: 0.0
-average reward score: 5.15625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.65%) |Training time=0.44s (20.67%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5203|ppo_ep: 1|act_loss: -0.0003268718719482422|cri_loss: -6.699562072753906e-05|unsuper_loss: 0.0
-average reward score: 5.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.65s (74.53%) |Training time=0.46s (20.87%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5204|ppo_ep: 1|act_loss: 0.033721923828125|cri_loss: 0.01727294921875|unsuper_loss: 0.0
-average reward score: 5.1171875
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.71s (76.03%) |Training time=0.44s (19.50%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.24 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5205|ppo_ep: 1|act_loss: 0.0024261474609375|cri_loss: 0.0014257431030273438|unsuper_loss: 0.0
-average reward score: 5.38671875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.59%) |Training time=0.44s (20.70%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5206|ppo_ep: 1|act_loss: -0.006072998046875|cri_loss: -0.0028514862060546875|unsuper_loss: 0.0
-average reward score: 4.96484375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.85%) |Training time=0.46s (21.48%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5207|ppo_ep: 1|act_loss: 0.021270751953125|cri_loss: 0.01160430908203125|unsuper_loss: 0.0
-average reward score: 6.6015625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.08%) |Training time=0.46s (21.23%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5208|ppo_ep: 1|act_loss: -0.006290435791015625|cri_loss: -0.0029544830322265625|unsuper_loss: 0.0
-average reward score: 4.88671875
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.67%) |Training time=0.46s (20.01%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.48
-[2023-04-14 11:59:19,968] [INFO] [logging.py:96:log_dist] [Rank 0] step=5210, skipped=70, lr=[3.0820089606469873e-06, 3.0820089606469873e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:59:19,986] [INFO] [timer.py:199:stop] epoch=0/micro_step=5210/global_step=5210, RunningAvgSamplesPerSec=105.25921683045586, CurrSamplesPerSec=106.84806254005278, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:59:20,078] [INFO] [logging.py:96:log_dist] [Rank 0] step=5210, skipped=84, lr=[1.6094739260442448e-06, 1.6094739260442448e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5209|ppo_ep: 1|act_loss: -0.01486968994140625|cri_loss: -0.007076263427734375|unsuper_loss: 0.0
-average reward score: 4.4296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.82%) |Training time=0.46s (21.55%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5210|ppo_ep: 1|act_loss: -0.00803375244140625|cri_loss: -0.0034542083740234375|unsuper_loss: 0.0
-average reward score: 4.625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.94%) |Training time=0.46s (21.38%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5211|ppo_ep: 1|act_loss: 0.0084381103515625|cri_loss: 0.004375457763671875|unsuper_loss: 0.0
-average reward score: 5.1015625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.67%) |Training time=0.47s (21.64%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5212|ppo_ep: 1|act_loss: 0.0023822784423828125|cri_loss: 0.0013141632080078125|unsuper_loss: 0.0
-average reward score: 5.84375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.41%) |Training time=0.45s (20.92%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5213|ppo_ep: 1|act_loss: 0.011505126953125|cri_loss: 0.00585174560546875|unsuper_loss: 0.0
-average reward score: 5.5703125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.61%) |Training time=0.45s (20.73%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5214|ppo_ep: 1|act_loss: 0.05206298828125|cri_loss: 0.0285491943359375|unsuper_loss: 0.0
-average reward score: 4.56640625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.65%) |Training time=0.44s (20.68%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5215|ppo_ep: 1|act_loss: -0.00803375244140625|cri_loss: -0.00379180908203125|unsuper_loss: 0.0
-average reward score: 5.59765625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.54%) |Training time=0.45s (20.80%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5216|ppo_ep: 1|act_loss: -0.02471923828125|cri_loss: -0.0115203857421875|unsuper_loss: 0.0
-average reward score: 4.6796875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (20.98%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5217|ppo_ep: 1|act_loss: -0.0132598876953125|cri_loss: -0.006458282470703125|unsuper_loss: 0.0
-average reward score: 5.859375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.56%) |Training time=0.45s (20.76%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5218|ppo_ep: 1|act_loss: 0.0123748779296875|cri_loss: 0.00687408447265625|unsuper_loss: 0.0
-average reward score: 5.87109375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.45%) |Training time=0.42s (19.84%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.48
-[2023-04-14 11:59:41,416] [INFO] [logging.py:96:log_dist] [Rank 0] step=5220, skipped=70, lr=[3.0647001243592035e-06, 3.0647001243592035e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 11:59:41,434] [INFO] [timer.py:199:stop] epoch=0/micro_step=5220/global_step=5220, RunningAvgSamplesPerSec=105.27236962780096, CurrSamplesPerSec=115.6615648442179, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 11:59:41,527] [INFO] [logging.py:96:log_dist] [Rank 0] step=5220, skipped=84, lr=[1.6004869136970152e-06, 1.6004869136970152e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5219|ppo_ep: 1|act_loss: -0.003662109375|cri_loss: -0.0016155242919921875|unsuper_loss: 0.0
-average reward score: 5.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.81%) |Training time=0.44s (20.53%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5220|ppo_ep: 1|act_loss: -0.03924560546875|cri_loss: -0.01666259765625|unsuper_loss: 0.0
-average reward score: 4.4921875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.43%) |Training time=0.45s (20.89%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5221|ppo_ep: 1|act_loss: -0.0004165172576904297|cri_loss: -7.677078247070312e-05|unsuper_loss: 0.0
-average reward score: 4.9765625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.15%) |Training time=0.45s (21.02%) |Others=0.10 (4.83%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5222|ppo_ep: 1|act_loss: -0.00646209716796875|cri_loss: -0.002994537353515625|unsuper_loss: 0.0
-average reward score: 5.828125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.34%) |Training time=0.45s (20.99%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5223|ppo_ep: 1|act_loss: 0.0120849609375|cri_loss: 0.006378173828125|unsuper_loss: 0.0
-average reward score: 4.95703125
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.76s (76.22%) |Training time=0.45s (19.42%) |Others=0.10 (4.36%)|CurSamplesPerSec=13.86 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5224|ppo_ep: 1|act_loss: 0.036102294921875|cri_loss: 0.0192718505859375|unsuper_loss: 0.0
-average reward score: 5.59375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.34%) |Training time=0.45s (21.00%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5225|ppo_ep: 1|act_loss: -0.00815582275390625|cri_loss: -0.0037250518798828125|unsuper_loss: 0.0
-average reward score: 5.4453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.51%) |Training time=0.45s (20.87%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5226|ppo_ep: 1|act_loss: -0.00539398193359375|cri_loss: -0.002559661865234375|unsuper_loss: 0.0
-average reward score: 4.5859375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.65%) |Training time=0.44s (20.66%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5227|ppo_ep: 1|act_loss: 0.01152801513671875|cri_loss: 0.006114959716796875|unsuper_loss: 0.0
-average reward score: 5.23046875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.39%) |Training time=0.45s (20.95%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5228|ppo_ep: 1|act_loss: -0.00246429443359375|cri_loss: -0.0006666183471679688|unsuper_loss: 0.0
-average reward score: 5.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.34%) |Training time=0.45s (21.01%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.48
-[2023-04-14 12:00:03,045] [INFO] [logging.py:96:log_dist] [Rank 0] step=5230, skipped=70, lr=[3.04741737996207e-06, 3.04741737996207e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:00:03,063] [INFO] [timer.py:199:stop] epoch=0/micro_step=5230/global_step=5230, RunningAvgSamplesPerSec=105.28411092329453, CurrSamplesPerSec=113.17489367012611, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:00:03,156] [INFO] [logging.py:96:log_dist] [Rank 0] step=5230, skipped=84, lr=[1.591513234303681e-06, 1.591513234303681e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5229|ppo_ep: 1|act_loss: 0.0034198760986328125|cri_loss: 0.0023326873779296875|unsuper_loss: 0.0
-average reward score: 5.30078125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.55%) |Training time=0.45s (20.78%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5230|ppo_ep: 1|act_loss: 0.002960205078125|cri_loss: 0.001796722412109375|unsuper_loss: 0.0
-average reward score: 5.58203125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.62%) |Training time=0.47s (21.72%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5231|ppo_ep: 1|act_loss: 0.0132293701171875|cri_loss: 0.00711822509765625|unsuper_loss: 0.0
-average reward score: 4.48828125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.59%) |Training time=0.47s (21.74%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5232|ppo_ep: 1|act_loss: -0.0017986297607421875|cri_loss: -0.0006337165832519531|unsuper_loss: 0.0
-average reward score: 5.44921875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.91%) |Training time=0.46s (21.44%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5233|ppo_ep: 1|act_loss: 0.0102081298828125|cri_loss: 0.00537109375|unsuper_loss: 0.0
-average reward score: 5.6015625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.53%) |Training time=0.45s (20.69%) |Others=0.10 (4.78%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5234|ppo_ep: 1|act_loss: -0.009002685546875|cri_loss: -0.004108428955078125|unsuper_loss: 0.0
-average reward score: 5.44921875
--------------------------------------------------------------------------------------
-|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.69s (75.68%) |Training time=0.44s (19.84%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.31 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5235|ppo_ep: 1|act_loss: -0.029693603515625|cri_loss: -0.014312744140625|unsuper_loss: 0.0
-average reward score: 5.37109375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.74%) |Training time=0.46s (21.58%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5236|ppo_ep: 1|act_loss: -0.00720977783203125|cri_loss: -0.0032634735107421875|unsuper_loss: 0.0
-average reward score: 5.20703125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.52%) |Training time=0.44s (20.78%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5237|ppo_ep: 1|act_loss: -0.004589080810546875|cri_loss: -0.0019083023071289062|unsuper_loss: 0.0
-average reward score: 5.37109375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.62%) |Training time=0.44s (20.72%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5238|ppo_ep: 1|act_loss: 0.00301361083984375|cri_loss: 0.00196075439453125|unsuper_loss: 0.0
-average reward score: 4.9375
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.70%) |Training time=0.44s (19.01%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.48
-[2023-04-14 12:00:24,816] [INFO] [logging.py:96:log_dist] [Rank 0] step=5240, skipped=70, lr=[3.0301609836275488e-06, 3.0301609836275488e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:00:24,834] [INFO] [timer.py:199:stop] epoch=0/micro_step=5240/global_step=5240, RunningAvgSamplesPerSec=105.29263773127231, CurrSamplesPerSec=111.4146109844281, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:00:24,926] [INFO] [logging.py:96:log_dist] [Rank 0] step=5240, skipped=84, lr=[1.58255302087582e-06, 1.58255302087582e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5239|ppo_ep: 1|act_loss: 0.002498626708984375|cri_loss: 0.0015401840209960938|unsuper_loss: 0.0
-average reward score: 4.515625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.36%) |Training time=0.45s (20.98%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5240|ppo_ep: 1|act_loss: 0.0056915283203125|cri_loss: 0.0032215118408203125|unsuper_loss: 0.0
-average reward score: 5.1484375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.39%) |Training time=0.45s (20.94%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5241|ppo_ep: 1|act_loss: 0.02130126953125|cri_loss: 0.01110076904296875|unsuper_loss: 0.0
-average reward score: 4.5234375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.42%) |Training time=0.45s (20.89%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5242|ppo_ep: 1|act_loss: 0.1400146484375|cri_loss: 0.08123779296875|unsuper_loss: 0.0
-average reward score: 5.4765625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.40%) |Training time=0.45s (20.93%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.48
-epoch: 0|step: 5243|ppo_ep: 1|act_loss: 0.0140228271484375|cri_loss: 0.00774383544921875|unsuper_loss: 0.0
-average reward score: 5.0859375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.50%) |Training time=0.45s (20.86%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5244|ppo_ep: 1|act_loss: 0.0035495758056640625|cri_loss: 0.00228118896484375|unsuper_loss: 0.0
-average reward score: 5.0703125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.48%) |Training time=0.45s (20.84%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5245|ppo_ep: 1|act_loss: 0.01265716552734375|cri_loss: 0.006671905517578125|unsuper_loss: 0.0
-average reward score: 4.2734375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.61%) |Training time=0.44s (20.70%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5246|ppo_ep: 1|act_loss: -0.0294189453125|cri_loss: -0.01203155517578125|unsuper_loss: 0.0
-average reward score: 4.44140625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.60%) |Training time=0.44s (20.74%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5247|ppo_ep: 1|act_loss: -0.04681396484375|cri_loss: -0.0164337158203125|unsuper_loss: 0.0
-average reward score: 5.25390625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.59%) |Training time=0.44s (20.71%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5248|ppo_ep: 1|act_loss: -0.00452423095703125|cri_loss: -0.0018215179443359375|unsuper_loss: 0.0
-average reward score: 4.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.49%) |Training time=0.45s (20.84%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.49
-[2023-04-14 12:00:46,261] [INFO] [logging.py:96:log_dist] [Rank 0] step=5250, skipped=70, lr=[3.012931191137065e-06, 3.012931191137065e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:00:46,279] [INFO] [timer.py:199:stop] epoch=0/micro_step=5250/global_step=5250, RunningAvgSamplesPerSec=105.3044718220239, CurrSamplesPerSec=103.76530800000619, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:00:46,372] [INFO] [logging.py:96:log_dist] [Rank 0] step=5250, skipped=84, lr=[1.5736064062254094e-06, 1.5736064062254094e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5249|ppo_ep: 1|act_loss: -0.00366973876953125|cri_loss: -0.0006198883056640625|unsuper_loss: 0.0
-average reward score: 4.3671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.70%) |Training time=0.47s (21.73%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5250|ppo_ep: 1|act_loss: 0.0237884521484375|cri_loss: 0.0128631591796875|unsuper_loss: 0.0
-average reward score: 4.3984375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.69%) |Training time=0.44s (20.61%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5251|ppo_ep: 1|act_loss: -0.011444091796875|cri_loss: -0.005619049072265625|unsuper_loss: 0.0
-average reward score: 4.7109375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.61%) |Training time=0.44s (20.71%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5252|ppo_ep: 1|act_loss: -0.0035037994384765625|cri_loss: -0.0016393661499023438|unsuper_loss: 0.0
-average reward score: 4.2109375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.52%) |Training time=0.45s (20.83%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5253|ppo_ep: 1|act_loss: -0.0030460357666015625|cri_loss: -0.0012302398681640625|unsuper_loss: 0.0
-average reward score: 5.62109375
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.53%) |Training time=0.44s (19.12%) |Others=0.10 (4.35%)|CurSamplesPerSec=13.80 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5254|ppo_ep: 1|act_loss: 0.006267547607421875|cri_loss: 0.0032138824462890625|unsuper_loss: 0.0
-average reward score: 5.19921875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.44%) |Training time=0.45s (20.88%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5255|ppo_ep: 1|act_loss: 0.012847900390625|cri_loss: 0.006938934326171875|unsuper_loss: 0.0
-average reward score: 5.234375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.09%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5256|ppo_ep: 1|act_loss: -0.0014257431030273438|cri_loss: -0.0003261566162109375|unsuper_loss: 0.0
-average reward score: 5.234375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.54%) |Training time=0.45s (20.79%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5257|ppo_ep: 1|act_loss: 0.0113677978515625|cri_loss: 0.005828857421875|unsuper_loss: 0.0
-average reward score: 5.19921875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.49%) |Training time=0.45s (20.83%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5258|ppo_ep: 1|act_loss: -0.03814697265625|cri_loss: -0.0180816650390625|unsuper_loss: 0.0
-average reward score: 4.875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.84%) |Training time=0.44s (20.47%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49
-[2023-04-14 12:01:07,849] [INFO] [logging.py:96:log_dist] [Rank 0] step=5260, skipped=70, lr=[2.9957282578777047e-06, 2.9957282578777047e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:01:07,867] [INFO] [timer.py:199:stop] epoch=0/micro_step=5260/global_step=5260, RunningAvgSamplesPerSec=105.31875428619172, CurrSamplesPerSec=111.46577904454388, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:01:07,960] [INFO] [logging.py:96:log_dist] [Rank 0] step=5260, skipped=84, lr=[1.5646735229628619e-06, 1.5646735229628619e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5259|ppo_ep: 1|act_loss: 0.012237548828125|cri_loss: 0.00710296630859375|unsuper_loss: 0.0
-average reward score: 4.65625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.40%) |Training time=0.45s (20.96%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5260|ppo_ep: 1|act_loss: -0.004589080810546875|cri_loss: -0.0017518997192382812|unsuper_loss: 0.0
-average reward score: 4.87890625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.58%) |Training time=0.44s (20.75%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5261|ppo_ep: 1|act_loss: -0.028533935546875|cri_loss: -0.0138702392578125|unsuper_loss: 0.0
-average reward score: 4.8359375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.50%) |Training time=0.45s (20.83%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5262|ppo_ep: 1|act_loss: -0.03631591796875|cri_loss: -0.017333984375|unsuper_loss: 0.0
-average reward score: 4.24609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.52%) |Training time=0.45s (20.83%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5263|ppo_ep: 1|act_loss: -0.007282257080078125|cri_loss: -0.003139495849609375|unsuper_loss: 0.0
-average reward score: 5.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.21%) |Training time=0.49s (22.26%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.44 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5264|ppo_ep: 1|act_loss: -0.0225830078125|cri_loss: -0.0083465576171875|unsuper_loss: 0.0
-average reward score: 5.3125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.81%) |Training time=0.44s (20.50%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5265|ppo_ep: 1|act_loss: -0.0156707763671875|cri_loss: -0.00771331787109375|unsuper_loss: 0.0
-average reward score: 5.08203125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.73%) |Training time=0.44s (20.57%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5266|ppo_ep: 1|act_loss: 0.018646240234375|cri_loss: 0.01043701171875|unsuper_loss: 0.0
-average reward score: 4.8046875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.37%) |Training time=0.45s (20.97%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5267|ppo_ep: 1|act_loss: -0.0035762786865234375|cri_loss: -0.0013942718505859375|unsuper_loss: 0.0
-average reward score: 5.6328125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.50%) |Training time=0.45s (20.80%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5268|ppo_ep: 1|act_loss: 0.0124969482421875|cri_loss: 0.00662994384765625|unsuper_loss: 0.0
-average reward score: 4.65234375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.14%) |Training time=0.48s (22.14%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.49
-[2023-04-14 12:01:29,408] [INFO] [logging.py:96:log_dist] [Rank 0] step=5270, skipped=70, lr=[2.978552438838442e-06, 2.978552438838442e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:01:29,426] [INFO] [timer.py:199:stop] epoch=0/micro_step=5270/global_step=5270, RunningAvgSamplesPerSec=105.33081301411826, CurrSamplesPerSec=112.13020758057145, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:01:29,519] [INFO] [logging.py:96:log_dist] [Rank 0] step=5270, skipped=84, lr=[1.5557545034950558e-06, 1.5557545034950558e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5269|ppo_ep: 1|act_loss: 0.025146484375|cri_loss: 0.01287841796875|unsuper_loss: 0.0
-average reward score: 4.71875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.46%) |Training time=0.45s (20.89%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5270|ppo_ep: 1|act_loss: 0.0148773193359375|cri_loss: 0.007671356201171875|unsuper_loss: 0.0
-average reward score: 5.5859375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.57%) |Training time=0.44s (20.75%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.49
-[2023-04-14 12:01:33,792] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 5271|ppo_ep: 1|act_loss: 0.027099609375|cri_loss: 0.013824462890625|unsuper_loss: 0.0
-average reward score: 5.21484375
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.05%) |Training time=0.44s (20.73%) |Others=0.09 (4.22%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.49
-[2023-04-14 12:01:35,924] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 5272|ppo_ep: 1|act_loss: 0.023406982421875|cri_loss: 0.0120086669921875|unsuper_loss: 0.0
-average reward score: 4.51953125
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.87%) |Training time=0.45s (20.94%) |Others=0.09 (4.19%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5273|ppo_ep: 1|act_loss: 0.009521484375|cri_loss: 0.005828857421875|unsuper_loss: 0.0
-average reward score: 4.5546875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.51%) |Training time=0.45s (20.81%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5274|ppo_ep: 1|act_loss: 0.0012540817260742188|cri_loss: 0.0006866455078125|unsuper_loss: 0.0
-average reward score: 5.36328125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.61%) |Training time=0.45s (20.72%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5275|ppo_ep: 1|act_loss: 0.0109710693359375|cri_loss: 0.005619049072265625|unsuper_loss: 0.0
-average reward score: 5.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.55%) |Training time=0.44s (20.79%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5276|ppo_ep: 1|act_loss: -0.01338958740234375|cri_loss: -0.006500244140625|unsuper_loss: 0.0
-average reward score: 4.58203125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (20.92%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5277|ppo_ep: 1|act_loss: -0.017333984375|cri_loss: -0.008331298828125|unsuper_loss: 0.0
-average reward score: 5.984375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.64%) |Training time=0.44s (20.66%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5278|ppo_ep: 1|act_loss: -0.021484375|cri_loss: -0.00748443603515625|unsuper_loss: 0.0
-average reward score: 5.3671875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.53%) |Training time=0.44s (20.81%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49
-[2023-04-14 12:01:50,806] [INFO] [logging.py:96:log_dist] [Rank 0] step=5280, skipped=70, lr=[2.9614039886063483e-06, 2.9614039886063483e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:01:50,824] [INFO] [timer.py:199:stop] epoch=0/micro_step=5280/global_step=5280, RunningAvgSamplesPerSec=105.34468671241142, CurrSamplesPerSec=112.25146359250908, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:01:50,917] [INFO] [logging.py:96:log_dist] [Rank 0] step=5280, skipped=86, lr=[1.5486293586993107e-06, 1.5486293586993107e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5279|ppo_ep: 1|act_loss: -0.0190277099609375|cri_loss: -0.0081939697265625|unsuper_loss: 0.0
-average reward score: 5.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.48%) |Training time=0.45s (20.88%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5280|ppo_ep: 1|act_loss: -0.025604248046875|cri_loss: -0.01247406005859375|unsuper_loss: 0.0
-average reward score: 5.6875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.99%) |Training time=0.46s (21.38%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5281|ppo_ep: 1|act_loss: -0.0189208984375|cri_loss: -0.0078582763671875|unsuper_loss: 0.0
-average reward score: 5.2109375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.22%) |Training time=0.46s (21.17%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5282|ppo_ep: 1|act_loss: 0.01389312744140625|cri_loss: 0.00717926025390625|unsuper_loss: 0.0
-average reward score: 4.76171875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.47%) |Training time=0.45s (20.88%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5283|ppo_ep: 1|act_loss: 0.03839111328125|cri_loss: 0.02044677734375|unsuper_loss: 0.0
-average reward score: 4.734375
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.64s (72.98%) |Training time=0.50s (22.23%) |Others=0.11 (4.79%)|CurSamplesPerSec=14.24 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5284|ppo_ep: 1|act_loss: 0.01202392578125|cri_loss: 0.0062408447265625|unsuper_loss: 0.0
-average reward score: 5.13671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.95%) |Training time=0.46s (21.40%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5285|ppo_ep: 1|act_loss: 0.0244903564453125|cri_loss: 0.01262664794921875|unsuper_loss: 0.0
-average reward score: 4.0703125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.11%) |Training time=0.46s (21.25%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5286|ppo_ep: 1|act_loss: 0.04376220703125|cri_loss: 0.0239715576171875|unsuper_loss: 0.0
-average reward score: 5.5859375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.98%) |Training time=0.46s (21.35%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5287|ppo_ep: 1|act_loss: -0.0039215087890625|cri_loss: -0.0018444061279296875|unsuper_loss: 0.0
-average reward score: 4.80078125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (20.96%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5288|ppo_ep: 1|act_loss: -0.0125885009765625|cri_loss: -0.004970550537109375|unsuper_loss: 0.0
-average reward score: 5.9296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.32%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.49
-[2023-04-14 12:02:12,452] [INFO] [logging.py:96:log_dist] [Rank 0] step=5290, skipped=70, lr=[2.9442831613628225e-06, 2.9442831613628225e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:02:12,471] [INFO] [timer.py:199:stop] epoch=0/micro_step=5290/global_step=5290, RunningAvgSamplesPerSec=105.35216406875476, CurrSamplesPerSec=109.4159719697328, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:02:12,563] [INFO] [logging.py:96:log_dist] [Rank 0] step=5290, skipped=86, lr=[1.5397356270701858e-06, 1.5397356270701858e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5289|ppo_ep: 1|act_loss: 0.0186767578125|cri_loss: 0.009857177734375|unsuper_loss: 0.0
-average reward score: 5.7578125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.18%) |Training time=0.46s (21.19%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5290|ppo_ep: 1|act_loss: -0.0070648193359375|cri_loss: -0.00283050537109375|unsuper_loss: 0.0
-average reward score: 4.8828125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.30%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5291|ppo_ep: 1|act_loss: 0.008026123046875|cri_loss: 0.00466156005859375|unsuper_loss: 0.0
-average reward score: 4.828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.05%) |Training time=0.46s (21.33%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5292|ppo_ep: 1|act_loss: 0.007476806640625|cri_loss: 0.00417327880859375|unsuper_loss: 0.0
-average reward score: 4.8515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.34%) |Training time=0.45s (21.00%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5293|ppo_ep: 1|act_loss: 0.0211029052734375|cri_loss: 0.0108184814453125|unsuper_loss: 0.0
-average reward score: 4.484375
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.49%) |Training time=0.46s (20.11%) |Others=0.10 (4.40%)|CurSamplesPerSec=14.06 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5294|ppo_ep: 1|act_loss: 0.002414703369140625|cri_loss: 0.001720428466796875|unsuper_loss: 0.0
-average reward score: 5.109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.36%) |Training time=0.45s (20.99%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5295|ppo_ep: 1|act_loss: 0.03570556640625|cri_loss: 0.0189056396484375|unsuper_loss: 0.0
-average reward score: 5.8125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.67%) |Training time=0.45s (20.66%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5296|ppo_ep: 1|act_loss: 0.05523681640625|cri_loss: 0.029052734375|unsuper_loss: 0.0
-average reward score: 4.55078125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.60%) |Training time=0.45s (20.72%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5297|ppo_ep: 1|act_loss: 0.0104522705078125|cri_loss: 0.00646209716796875|unsuper_loss: 0.0
-average reward score: 4.38671875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.56%) |Training time=0.45s (20.79%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5298|ppo_ep: 1|act_loss: -0.01441192626953125|cri_loss: -0.0060272216796875|unsuper_loss: 0.0
-average reward score: 4.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.59s (70.73%) |Training time=0.48s (21.46%) |Others=0.18 (7.80%)|CurSamplesPerSec=14.20 |AvgSamplesPerSec=14.49
-[2023-04-14 12:02:34,208] [INFO] [logging.py:96:log_dist] [Rank 0] step=5300, skipped=70, lr=[2.927190210879829e-06, 2.927190210879829e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:02:34,226] [INFO] [timer.py:199:stop] epoch=0/micro_step=5300/global_step=5300, RunningAvgSamplesPerSec=105.36035663269071, CurrSamplesPerSec=112.99862095906488, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:02:34,319] [INFO] [logging.py:96:log_dist] [Rank 0] step=5300, skipped=86, lr=[1.5308561288755794e-06, 1.5308561288755794e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5299|ppo_ep: 1|act_loss: -0.004306793212890625|cri_loss: -0.001880645751953125|unsuper_loss: 0.0
-average reward score: 4.703125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.52%) |Training time=0.45s (20.77%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5300|ppo_ep: 1|act_loss: -0.01861572265625|cri_loss: -0.008331298828125|unsuper_loss: 0.0
-average reward score: 5.01171875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.45s (21.15%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5301|ppo_ep: 1|act_loss: 0.006916046142578125|cri_loss: 0.0038356781005859375|unsuper_loss: 0.0
-average reward score: 4.9296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.45s (21.13%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5302|ppo_ep: 1|act_loss: -0.03961181640625|cri_loss: -0.018798828125|unsuper_loss: 0.0
-average reward score: 4.8984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.45s (21.06%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5303|ppo_ep: 1|act_loss: 0.013214111328125|cri_loss: 0.008453369140625|unsuper_loss: 0.0
-average reward score: 4.42578125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.05%) |Training time=0.46s (21.31%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5304|ppo_ep: 1|act_loss: -0.0126495361328125|cri_loss: -0.0062103271484375|unsuper_loss: 0.0
-average reward score: 4.890625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.09%) |Training time=0.46s (21.25%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5305|ppo_ep: 1|act_loss: 0.01092529296875|cri_loss: 0.005596160888671875|unsuper_loss: 0.0
-average reward score: 6.02734375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.23%) |Training time=0.45s (21.11%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5306|ppo_ep: 1|act_loss: -0.0031070709228515625|cri_loss: -0.000335693359375|unsuper_loss: 0.0
-average reward score: 4.3125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.00%) |Training time=0.46s (21.37%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5307|ppo_ep: 1|act_loss: -0.013458251953125|cri_loss: -0.00453948974609375|unsuper_loss: 0.0
-average reward score: 5.23828125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.05%) |Training time=0.46s (21.27%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5308|ppo_ep: 1|act_loss: 0.03436279296875|cri_loss: 0.0176849365234375|unsuper_loss: 0.0
-average reward score: 5.0625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.10%) |Training time=0.46s (21.26%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.49
-[2023-04-14 12:02:55,722] [INFO] [logging.py:96:log_dist] [Rank 0] step=5310, skipped=70, lr=[2.910125390516126e-06, 2.910125390516126e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:02:55,740] [INFO] [timer.py:199:stop] epoch=0/micro_step=5310/global_step=5310, RunningAvgSamplesPerSec=105.3658343095268, CurrSamplesPerSec=102.50064569932177, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:02:55,833] [INFO] [logging.py:96:log_dist] [Rank 0] step=5310, skipped=86, lr=[1.521990995731075e-06, 1.521990995731075e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5309|ppo_ep: 1|act_loss: 0.00766754150390625|cri_loss: 0.00418853759765625|unsuper_loss: 0.0
-average reward score: 5.671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.90%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5310|ppo_ep: 1|act_loss: 0.01290130615234375|cri_loss: 0.007411956787109375|unsuper_loss: 0.0
-average reward score: 5.0234375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.11%) |Training time=0.46s (21.28%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5311|ppo_ep: 1|act_loss: 0.0115509033203125|cri_loss: 0.00615692138671875|unsuper_loss: 0.0
-average reward score: 5.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.70%) |Training time=0.44s (20.63%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5312|ppo_ep: 1|act_loss: 0.018096923828125|cri_loss: 0.01111602783203125|unsuper_loss: 0.0
-average reward score: 4.6015625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.32%) |Training time=0.45s (21.01%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5313|ppo_ep: 1|act_loss: -0.019775390625|cri_loss: -0.00923919677734375|unsuper_loss: 0.0
-average reward score: 4.77734375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.16%) |Training time=0.45s (21.11%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5314|ppo_ep: 1|act_loss: 0.0172271728515625|cri_loss: 0.0090484619140625|unsuper_loss: 0.0
-average reward score: 5.8359375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.29%) |Training time=0.45s (21.04%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5315|ppo_ep: 1|act_loss: -0.00021028518676757812|cri_loss: 0.0001347064971923828|unsuper_loss: 0.0
-average reward score: 5.19140625
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.67%) |Training time=0.47s (20.04%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5316|ppo_ep: 1|act_loss: -0.00426483154296875|cri_loss: -0.000179290771484375|unsuper_loss: 0.0
-average reward score: 5.34375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.09%) |Training time=0.46s (21.28%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5317|ppo_ep: 1|act_loss: -0.0338134765625|cri_loss: -0.0160064697265625|unsuper_loss: 0.0
-average reward score: 5.75
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.47s (21.99%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5318|ppo_ep: 1|act_loss: -0.045074462890625|cri_loss: -0.022003173828125|unsuper_loss: 0.0
-average reward score: 4.75390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.46%) |Training time=0.47s (21.87%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.49
-[2023-04-14 12:03:17,417] [INFO] [logging.py:96:log_dist] [Rank 0] step=5320, skipped=70, lr=[2.89308895321352e-06, 2.89308895321352e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:03:17,435] [INFO] [timer.py:199:stop] epoch=0/micro_step=5320/global_step=5320, RunningAvgSamplesPerSec=105.3698549311855, CurrSamplesPerSec=103.94926211193594, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:03:17,527] [INFO] [logging.py:96:log_dist] [Rank 0] step=5320, skipped=86, lr=[1.5131403590393323e-06, 1.5131403590393323e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5319|ppo_ep: 1|act_loss: 0.01412200927734375|cri_loss: 0.007457733154296875|unsuper_loss: 0.0
-average reward score: 5.53125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.48%) |Training time=0.47s (21.90%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5320|ppo_ep: 1|act_loss: 0.02178955078125|cri_loss: 0.01132965087890625|unsuper_loss: 0.0
-average reward score: 5.10546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.15%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5321|ppo_ep: 1|act_loss: 0.0303497314453125|cri_loss: 0.0167083740234375|unsuper_loss: 0.0
-average reward score: 5.171875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.42%) |Training time=0.47s (21.92%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5322|ppo_ep: 1|act_loss: -0.023895263671875|cri_loss: -0.0081329345703125|unsuper_loss: 0.0
-average reward score: 4.63671875
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.54%) |Training time=0.50s (22.91%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5323|ppo_ep: 1|act_loss: -0.0027008056640625|cri_loss: -0.001178741455078125|unsuper_loss: 0.0
-average reward score: 5.265625
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.68s (73.67%) |Training time=0.50s (21.88%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.02 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5324|ppo_ep: 1|act_loss: 0.0748291015625|cri_loss: 0.04034423828125|unsuper_loss: 0.0
-average reward score: 5.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.33%) |Training time=0.47s (21.99%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5325|ppo_ep: 1|act_loss: -0.0117340087890625|cri_loss: -0.00556182861328125|unsuper_loss: 0.0
-average reward score: 5.19921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.41%) |Training time=0.45s (20.92%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5326|ppo_ep: 1|act_loss: 0.0284271240234375|cri_loss: 0.0148162841796875|unsuper_loss: 0.0
-average reward score: 4.53515625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.49%) |Training time=0.45s (20.80%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5327|ppo_ep: 1|act_loss: 0.02947998046875|cri_loss: 0.01580810546875|unsuper_loss: 0.0
-average reward score: 5.3125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.52%) |Training time=0.47s (21.84%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5328|ppo_ep: 1|act_loss: 0.0015621185302734375|cri_loss: 0.0009093284606933594|unsuper_loss: 0.0
-average reward score: 4.6015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.74%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.49
-[2023-04-14 12:03:39,193] [INFO] [logging.py:96:log_dist] [Rank 0] step=5330, skipped=70, lr=[2.8760811514931076e-06, 2.8760811514931076e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:03:39,211] [INFO] [timer.py:199:stop] epoch=0/micro_step=5330/global_step=5330, RunningAvgSamplesPerSec=105.36426483738646, CurrSamplesPerSec=100.47936851593158, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:03:39,303] [INFO] [logging.py:96:log_dist] [Rank 0] step=5330, skipped=86, lr=[1.5043043499881378e-06, 1.5043043499881378e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5329|ppo_ep: 1|act_loss: 0.021636962890625|cri_loss: 0.0137786865234375|unsuper_loss: 0.0
-average reward score: 5.13671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.29%) |Training time=0.48s (22.12%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5330|ppo_ep: 1|act_loss: 0.02362060546875|cri_loss: 0.013427734375|unsuper_loss: 0.0
-average reward score: 5.1953125
--------------------------------------------------------------------------------------
-|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.20%) |Training time=0.48s (20.50%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.65 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5331|ppo_ep: 1|act_loss: 0.01336669921875|cri_loss: 0.00736236572265625|unsuper_loss: 0.0
-average reward score: 5.44921875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.37%) |Training time=0.45s (20.97%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5332|ppo_ep: 1|act_loss: 0.004276275634765625|cri_loss: 0.0023345947265625|unsuper_loss: 0.0
-average reward score: 5.2578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.40%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5333|ppo_ep: 1|act_loss: -0.00860595703125|cri_loss: -0.003955841064453125|unsuper_loss: 0.0
-average reward score: 4.453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.86%) |Training time=0.46s (21.50%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5334|ppo_ep: 1|act_loss: -0.0253448486328125|cri_loss: -0.0120697021484375|unsuper_loss: 0.0
-average reward score: 4.625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.16%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5335|ppo_ep: 1|act_loss: -0.0122528076171875|cri_loss: -0.005382537841796875|unsuper_loss: 0.0
-average reward score: 6.08984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.46s (21.22%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5336|ppo_ep: 1|act_loss: -0.0132598876953125|cri_loss: -0.005645751953125|unsuper_loss: 0.0
-average reward score: 4.734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.55%) |Training time=0.46s (21.13%) |Others=0.12 (5.32%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5337|ppo_ep: 1|act_loss: 0.0557861328125|cri_loss: 0.02984619140625|unsuper_loss: 0.0
-average reward score: 4.90234375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.13%) |Training time=0.46s (21.22%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5338|ppo_ep: 1|act_loss: 0.036956787109375|cri_loss: 0.01953125|unsuper_loss: 0.0
-average reward score: 4.61328125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.21%) |Training time=0.46s (21.16%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.49
-[2023-04-14 12:04:00,950] [INFO] [logging.py:96:log_dist] [Rank 0] step=5340, skipped=70, lr=[2.8591022374515417e-06, 2.8591022374515417e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:04:00,968] [INFO] [timer.py:199:stop] epoch=0/micro_step=5340/global_step=5340, RunningAvgSamplesPerSec=105.36797444530119, CurrSamplesPerSec=107.08180623625253, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:04:01,061] [INFO] [logging.py:96:log_dist] [Rank 0] step=5340, skipped=86, lr=[1.495483099548462e-06, 1.495483099548462e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5339|ppo_ep: 1|act_loss: 0.0024166107177734375|cri_loss: 0.0014972686767578125|unsuper_loss: 0.0
-average reward score: 5.10546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.99%) |Training time=0.46s (21.39%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5340|ppo_ep: 1|act_loss: 0.0214385986328125|cri_loss: 0.013397216796875|unsuper_loss: 0.0
-average reward score: 4.5234375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.30%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5341|ppo_ep: 1|act_loss: 0.0792236328125|cri_loss: 0.044647216796875|unsuper_loss: 0.0
-average reward score: 4.71484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.24%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5342|ppo_ep: 1|act_loss: -0.01378631591796875|cri_loss: -0.006000518798828125|unsuper_loss: 0.0
-average reward score: 4.6953125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.06%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5343|ppo_ep: 1|act_loss: 0.0090484619140625|cri_loss: 0.005382537841796875|unsuper_loss: 0.0
-average reward score: 4.8984375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.87%) |Training time=0.44s (20.43%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5344|ppo_ep: 1|act_loss: -0.015045166015625|cri_loss: -0.00681304931640625|unsuper_loss: 0.0
-average reward score: 5.390625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.14%) |Training time=0.49s (22.28%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5345|ppo_ep: 1|act_loss: -0.0557861328125|cri_loss: -0.0258026123046875|unsuper_loss: 0.0
-average reward score: 5.00390625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.78%) |Training time=0.44s (20.50%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5346|ppo_ep: 1|act_loss: -0.0288238525390625|cri_loss: -0.01389312744140625|unsuper_loss: 0.0
-average reward score: 4.21484375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.86%) |Training time=0.44s (20.44%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5347|ppo_ep: 1|act_loss: -0.026397705078125|cri_loss: -0.01287841796875|unsuper_loss: 0.0
-average reward score: 5.21875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.83%) |Training time=0.44s (20.45%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5348|ppo_ep: 1|act_loss: -0.0306396484375|cri_loss: -0.0144195556640625|unsuper_loss: 0.0
-average reward score: 5.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.61%) |Training time=0.44s (20.63%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.49
-[2023-04-14 12:04:22,444] [INFO] [logging.py:96:log_dist] [Rank 0] step=5350, skipped=70, lr=[2.8421524627572866e-06, 2.8421524627572866e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:04:22,463] [INFO] [timer.py:199:stop] epoch=0/micro_step=5350/global_step=5350, RunningAvgSamplesPerSec=105.38047175709768, CurrSamplesPerSec=112.89862840396556, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:04:22,555] [INFO] [logging.py:96:log_dist] [Rank 0] step=5350, skipped=86, lr=[1.486676738472515e-06, 1.486676738472515e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5349|ppo_ep: 1|act_loss: 0.0217742919921875|cri_loss: 0.01186370849609375|unsuper_loss: 0.0
-average reward score: 4.67578125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.53%) |Training time=0.45s (20.81%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5350|ppo_ep: 1|act_loss: 0.1251220703125|cri_loss: 0.07086181640625|unsuper_loss: 0.0
-average reward score: 4.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.77%) |Training time=0.44s (20.55%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5351|ppo_ep: 1|act_loss: 0.006988525390625|cri_loss: 0.003627777099609375|unsuper_loss: 0.0
-average reward score: 6.07421875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.70%) |Training time=0.44s (20.65%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5352|ppo_ep: 1|act_loss: 0.0302734375|cri_loss: 0.0184783935546875|unsuper_loss: 0.0
-average reward score: 4.58984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.89%) |Training time=0.45s (20.49%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5353|ppo_ep: 1|act_loss: 0.1055908203125|cri_loss: 0.05938720703125|unsuper_loss: 0.0
-average reward score: 4.58203125
--------------------------------------------------------------------------------------
-|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.76%) |Training time=0.44s (19.74%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.28 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5354|ppo_ep: 1|act_loss: -0.09637451171875|cri_loss: -0.041259765625|unsuper_loss: 0.0
-average reward score: 4.77734375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.59%) |Training time=0.45s (20.76%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5355|ppo_ep: 1|act_loss: -0.01036834716796875|cri_loss: -0.00457000732421875|unsuper_loss: 0.0
-average reward score: 5.7890625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.89%) |Training time=0.44s (20.39%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5356|ppo_ep: 1|act_loss: -0.0211181640625|cri_loss: -0.0093994140625|unsuper_loss: 0.0
-average reward score: 4.48046875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.31%) |Training time=0.45s (21.05%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5357|ppo_ep: 1|act_loss: 0.004425048828125|cri_loss: 0.0028228759765625|unsuper_loss: 0.0
-average reward score: 4.546875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.54%) |Training time=0.44s (20.79%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5358|ppo_ep: 1|act_loss: 0.0533447265625|cri_loss: 0.0287322998046875|unsuper_loss: 0.0
-average reward score: 5.3515625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.56%) |Training time=0.44s (20.71%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.49
-[2023-04-14 12:04:44,014] [INFO] [logging.py:96:log_dist] [Rank 0] step=5360, skipped=70, lr=[2.8252320786468936e-06, 2.8252320786468936e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:04:44,033] [INFO] [timer.py:199:stop] epoch=0/micro_step=5360/global_step=5360, RunningAvgSamplesPerSec=105.39471890271973, CurrSamplesPerSec=112.58941413422879, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:04:44,125] [INFO] [logging.py:96:log_dist] [Rank 0] step=5360, skipped=86, lr=[1.477885397291814e-06, 1.477885397291814e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5359|ppo_ep: 1|act_loss: 0.03546142578125|cri_loss: 0.018402099609375|unsuper_loss: 0.0
-average reward score: 4.31640625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.50%) |Training time=0.45s (20.85%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5360|ppo_ep: 1|act_loss: -0.01268768310546875|cri_loss: -0.005405426025390625|unsuper_loss: 0.0
-average reward score: 4.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.74s (75.74%) |Training time=0.46s (19.90%) |Others=0.10 (4.36%)|CurSamplesPerSec=13.89 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5361|ppo_ep: 1|act_loss: 0.218994140625|cri_loss: 0.1324462890625|unsuper_loss: 0.0
-average reward score: 4.7578125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.71%) |Training time=0.46s (21.61%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5362|ppo_ep: 1|act_loss: -0.01126861572265625|cri_loss: -0.0054168701171875|unsuper_loss: 0.0
-average reward score: 5.40625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.68%) |Training time=0.46s (21.64%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5363|ppo_ep: 1|act_loss: 0.004543304443359375|cri_loss: 0.0038318634033203125|unsuper_loss: 0.0
-average reward score: 4.3125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.77%) |Training time=0.46s (21.55%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5364|ppo_ep: 1|act_loss: 0.0152435302734375|cri_loss: 0.0079498291015625|unsuper_loss: 0.0
-average reward score: 4.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.62%) |Training time=0.46s (21.70%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5365|ppo_ep: 1|act_loss: 0.0078277587890625|cri_loss: 0.00411224365234375|unsuper_loss: 0.0
-average reward score: 5.47265625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.76%) |Training time=0.46s (21.54%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5366|ppo_ep: 1|act_loss: 0.002910614013671875|cri_loss: 0.0019168853759765625|unsuper_loss: 0.0
-average reward score: 4.17578125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.55%) |Training time=0.47s (21.76%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5367|ppo_ep: 1|act_loss: -0.0033702850341796875|cri_loss: -0.0005016326904296875|unsuper_loss: 0.0
-average reward score: 4.6875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.72%) |Training time=0.46s (21.60%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5368|ppo_ep: 1|act_loss: 0.004711151123046875|cri_loss: 0.0026111602783203125|unsuper_loss: 0.0
-average reward score: 4.46484375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.81%) |Training time=0.46s (21.50%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49
-[2023-04-14 12:05:05,578] [INFO] [logging.py:96:log_dist] [Rank 0] step=5370, skipped=70, lr=[2.808341335921272e-06, 2.808341335921272e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:05:05,596] [INFO] [timer.py:199:stop] epoch=0/micro_step=5370/global_step=5370, RunningAvgSamplesPerSec=105.39691619380453, CurrSamplesPerSec=104.9207163684404, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:05:05,689] [INFO] [logging.py:96:log_dist] [Rank 0] step=5370, skipped=86, lr=[1.4691092063152417e-06, 1.4691092063152417e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5369|ppo_ep: 1|act_loss: -0.0125885009765625|cri_loss: -0.006072998046875|unsuper_loss: 0.0
-average reward score: 4.87890625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.57%) |Training time=0.47s (21.79%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5370|ppo_ep: 1|act_loss: -0.005306243896484375|cri_loss: -0.002552032470703125|unsuper_loss: 0.0
-average reward score: 4.66796875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.59%) |Training time=0.47s (21.74%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5371|ppo_ep: 1|act_loss: -0.149658203125|cri_loss: -0.0469970703125|unsuper_loss: 0.0
-average reward score: 4.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.23%) |Training time=0.48s (22.12%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5372|ppo_ep: 1|act_loss: -0.0189666748046875|cri_loss: -0.009185791015625|unsuper_loss: 0.0
-average reward score: 4.51953125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.37%) |Training time=0.45s (20.93%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.49
-[2023-04-14 12:05:14,283] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 5373|ppo_ep: 1|act_loss: 0.0158843994140625|cri_loss: 0.0085906982421875|unsuper_loss: 0.0
-average reward score: 5.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.09%) |Training time=0.46s (21.71%) |Others=0.09 (4.19%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.49
-[2023-04-14 12:05:16,416] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 5374|ppo_ep: 1|act_loss: -0.03131103515625|cri_loss: -0.0146484375|unsuper_loss: 0.0
-average reward score: 4.73046875
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.21%) |Training time=0.46s (21.59%) |Others=0.09 (4.19%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5375|ppo_ep: 1|act_loss: -0.0276336669921875|cri_loss: -0.0133819580078125|unsuper_loss: 0.0
-average reward score: 4.17578125
--------------------------------------------------------------------------------------
-|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.90%) |Training time=0.45s (19.74%) |Others=0.10 (4.36%)|CurSamplesPerSec=13.88 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5376|ppo_ep: 1|act_loss: -0.02777099609375|cri_loss: -0.01239776611328125|unsuper_loss: 0.0
-average reward score: 4.32421875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.54%) |Training time=0.47s (21.74%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5377|ppo_ep: 1|act_loss: -0.025543212890625|cri_loss: -0.00637054443359375|unsuper_loss: 0.0
-average reward score: 4.20703125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.72%) |Training time=0.46s (21.62%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5378|ppo_ep: 1|act_loss: -0.023193359375|cri_loss: -0.011199951171875|unsuper_loss: 0.0
-average reward score: 4.1875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.79%) |Training time=0.46s (21.51%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.49
-[2023-04-14 12:05:27,179] [INFO] [logging.py:96:log_dist] [Rank 0] step=5380, skipped=70, lr=[2.7914804849419754e-06, 2.7914804849419754e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:05:27,197] [INFO] [timer.py:199:stop] epoch=0/micro_step=5380/global_step=5380, RunningAvgSamplesPerSec=105.39935198633347, CurrSamplesPerSec=106.7170614225843, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:05:27,294] [INFO] [logging.py:96:log_dist] [Rank 0] step=5380, skipped=88, lr=[1.462099249105216e-06, 1.462099249105216e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5379|ppo_ep: 1|act_loss: 0.0011959075927734375|cri_loss: 0.00070953369140625|unsuper_loss: 0.0
-average reward score: 4.671875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.70%) |Others=0.10 (4.82%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5380|ppo_ep: 1|act_loss: -0.0013675689697265625|cri_loss: 0.0005321502685546875|unsuper_loss: 0.0
-average reward score: 5.3046875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.72%) |Training time=0.46s (21.61%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5381|ppo_ep: 1|act_loss: 0.01001739501953125|cri_loss: 0.0052490234375|unsuper_loss: 0.0
-average reward score: 5.57421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.84%) |Training time=0.46s (21.13%) |Others=0.11 (5.03%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5382|ppo_ep: 1|act_loss: 0.0014362335205078125|cri_loss: 0.0013036727905273438|unsuper_loss: 0.0
-average reward score: 4.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.42%) |Training time=0.48s (22.01%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5383|ppo_ep: 1|act_loss: 0.016265869140625|cri_loss: 0.00885772705078125|unsuper_loss: 0.0
-average reward score: 5.296875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.61%) |Training time=0.47s (21.73%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5384|ppo_ep: 1|act_loss: 0.021728515625|cri_loss: 0.01099395751953125|unsuper_loss: 0.0
-average reward score: 4.4609375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.82%) |Training time=0.46s (21.51%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5385|ppo_ep: 1|act_loss: 0.01617431640625|cri_loss: 0.008880615234375|unsuper_loss: 0.0
-average reward score: 4.75
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.90%) |Training time=0.46s (21.43%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5386|ppo_ep: 1|act_loss: 0.01033782958984375|cri_loss: 0.005344390869140625|unsuper_loss: 0.0
-average reward score: 4.85546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.00%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5387|ppo_ep: 1|act_loss: 0.0189056396484375|cri_loss: 0.0101318359375|unsuper_loss: 0.0
-average reward score: 5.10546875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.06%) |Training time=0.46s (21.26%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5388|ppo_ep: 1|act_loss: -0.087646484375|cri_loss: -0.0062255859375|unsuper_loss: 0.0
-average reward score: 4.29296875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.74%) |Training time=0.46s (21.53%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.49
-[2023-04-14 12:05:48,661] [INFO] [logging.py:96:log_dist] [Rank 0] step=5390, skipped=70, lr=[2.774649775627491e-06, 2.774649775627491e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:05:48,679] [INFO] [timer.py:199:stop] epoch=0/micro_step=5390/global_step=5390, RunningAvgSamplesPerSec=105.40158110513609, CurrSamplesPerSec=108.0063218002324, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:05:48,771] [INFO] [logging.py:96:log_dist] [Rank 0] step=5390, skipped=88, lr=[1.4533506561564305e-06, 1.4533506561564305e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5389|ppo_ep: 1|act_loss: -0.0211029052734375|cri_loss: -0.0100860595703125|unsuper_loss: 0.0
-average reward score: 3.73828125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.64%) |Training time=0.46s (21.40%) |Others=0.11 (4.95%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5390|ppo_ep: 1|act_loss: 0.06304931640625|cri_loss: 0.03387451171875|unsuper_loss: 0.0
-average reward score: 4.76171875
--------------------------------------------------------------------------------------
-|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.20%) |Training time=0.46s (20.09%) |Others=0.11 (4.71%)|CurSamplesPerSec=14.00 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5391|ppo_ep: 1|act_loss: -0.00705718994140625|cri_loss: -0.00333404541015625|unsuper_loss: 0.0
-average reward score: 4.0703125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.79%) |Training time=0.46s (21.55%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5392|ppo_ep: 1|act_loss: 0.036102294921875|cri_loss: 0.0203704833984375|unsuper_loss: 0.0
-average reward score: 5.90234375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.04%) |Training time=0.48s (22.32%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5393|ppo_ep: 1|act_loss: -0.0137176513671875|cri_loss: -0.0059967041015625|unsuper_loss: 0.0
-average reward score: 5.15234375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.58%) |Training time=0.47s (21.74%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5394|ppo_ep: 1|act_loss: 0.02825927734375|cri_loss: 0.0151519775390625|unsuper_loss: 0.0
-average reward score: 5.45703125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.93%) |Training time=0.46s (21.40%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5395|ppo_ep: 1|act_loss: 0.01000213623046875|cri_loss: 0.00513458251953125|unsuper_loss: 0.0
-average reward score: 4.45703125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.70%) |Training time=0.46s (21.51%) |Others=0.10 (4.78%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5396|ppo_ep: 1|act_loss: -0.00942230224609375|cri_loss: -0.00244140625|unsuper_loss: 0.0
-average reward score: 5.80078125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.76%) |Training time=0.46s (21.54%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5397|ppo_ep: 1|act_loss: -0.0196685791015625|cri_loss: -0.0056610107421875|unsuper_loss: 0.0
-average reward score: 3.70703125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.95%) |Training time=0.46s (21.39%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5398|ppo_ep: 1|act_loss: -0.02227783203125|cri_loss: -0.0088958740234375|unsuper_loss: 0.0
-average reward score: 4.4453125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.66%) |Training time=0.46s (21.68%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.49
-[2023-04-14 12:06:10,231] [INFO] [logging.py:96:log_dist] [Rank 0] step=5400, skipped=70, lr=[2.757849457449532e-06, 2.757849457449532e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:06:10,249] [INFO] [timer.py:199:stop] epoch=0/micro_step=5400/global_step=5400, RunningAvgSamplesPerSec=105.40350440139551, CurrSamplesPerSec=107.39463033761628, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:06:10,342] [INFO] [logging.py:96:log_dist] [Rank 0] step=5400, skipped=88, lr=[1.4446175770758245e-06, 1.4446175770758245e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5399|ppo_ep: 1|act_loss: -0.012451171875|cri_loss: -0.00595855712890625|unsuper_loss: 0.0
-average reward score: 4.16015625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.82%) |Training time=0.46s (21.52%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5400|ppo_ep: 1|act_loss: -0.0237579345703125|cri_loss: -0.011627197265625|unsuper_loss: 0.0
-average reward score: 4.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.45%) |Training time=0.47s (21.91%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5401|ppo_ep: 1|act_loss: -0.0233154296875|cri_loss: -0.0113372802734375|unsuper_loss: 0.0
-average reward score: 4.359375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.63%) |Training time=0.46s (21.64%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5402|ppo_ep: 1|act_loss: -0.0163421630859375|cri_loss: -0.00795745849609375|unsuper_loss: 0.0
-average reward score: 4.75390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.00%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5403|ppo_ep: 1|act_loss: -0.006744384765625|cri_loss: -0.0031108856201171875|unsuper_loss: 0.0
-average reward score: 4.984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.76%) |Training time=0.49s (22.64%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5404|ppo_ep: 1|act_loss: -0.060516357421875|cri_loss: -0.02740478515625|unsuper_loss: 0.0
-average reward score: 4.55859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.88%) |Training time=0.49s (22.52%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5405|ppo_ep: 1|act_loss: -0.004638671875|cri_loss: -0.0005283355712890625|unsuper_loss: 0.0
-average reward score: 4.53515625
--------------------------------------------------------------------------------------
-|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.57%) |Training time=0.50s (21.16%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.59 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5406|ppo_ep: 1|act_loss: 0.019073486328125|cri_loss: 0.0097198486328125|unsuper_loss: 0.0
-average reward score: 5.9375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.47s (21.69%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5407|ppo_ep: 1|act_loss: 0.024169921875|cri_loss: 0.012420654296875|unsuper_loss: 0.0
-average reward score: 4.74609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.50%) |Training time=0.47s (21.87%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5408|ppo_ep: 1|act_loss: 0.007778167724609375|cri_loss: 0.0053558349609375|unsuper_loss: 0.0
-average reward score: 5.26953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.33%) |Training time=0.48s (22.02%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.49
-[2023-04-14 12:06:31,997] [INFO] [logging.py:96:log_dist] [Rank 0] step=5410, skipped=70, lr=[2.7410797794293427e-06, 2.7410797794293427e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:06:32,015] [INFO] [timer.py:199:stop] epoch=0/micro_step=5410/global_step=5410, RunningAvgSamplesPerSec=105.39676190547151, CurrSamplesPerSec=106.50273442583405, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:06:32,108] [INFO] [logging.py:96:log_dist] [Rank 0] step=5410, skipped=88, lr=[1.4359001413086976e-06, 1.4359001413086976e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5409|ppo_ep: 1|act_loss: -0.00499725341796875|cri_loss: -0.002079010009765625|unsuper_loss: 0.0
-average reward score: 5.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.84%) |Training time=0.46s (21.52%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5410|ppo_ep: 1|act_loss: -0.00609588623046875|cri_loss: -0.002689361572265625|unsuper_loss: 0.0
-average reward score: 4.4453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.46s (21.41%) |Others=0.11 (4.88%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5411|ppo_ep: 1|act_loss: 0.01546478271484375|cri_loss: 0.00794219970703125|unsuper_loss: 0.0
-average reward score: 5.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.50%) |Training time=0.49s (22.34%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5412|ppo_ep: 1|act_loss: -0.0040435791015625|cri_loss: -0.0018091201782226562|unsuper_loss: 0.0
-average reward score: 4.921875
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.68s (73.97%) |Training time=0.49s (21.62%) |Others=0.10 (4.41%)|CurSamplesPerSec=14.05 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5413|ppo_ep: 1|act_loss: 0.0044708251953125|cri_loss: 0.0024280548095703125|unsuper_loss: 0.0
-average reward score: 5.359375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.81%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.49
-epoch: 0|step: 5414|ppo_ep: 1|act_loss: -0.035888671875|cri_loss: -0.0156097412109375|unsuper_loss: 0.0
-average reward score: 5.58984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.52%) |Training time=0.47s (21.80%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5415|ppo_ep: 1|act_loss: 0.0011444091796875|cri_loss: 0.0019741058349609375|unsuper_loss: 0.0
-average reward score: 6.15625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.42%) |Training time=0.47s (21.96%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5416|ppo_ep: 1|act_loss: -0.031219482421875|cri_loss: -0.01428985595703125|unsuper_loss: 0.0
-average reward score: 5.03515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.37%) |Training time=0.47s (21.97%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5417|ppo_ep: 1|act_loss: -0.01873779296875|cri_loss: -0.005611419677734375|unsuper_loss: 0.0
-average reward score: 5.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.59%) |Training time=0.47s (21.79%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5418|ppo_ep: 1|act_loss: 0.0180816650390625|cri_loss: 0.0113677978515625|unsuper_loss: 0.0
-average reward score: 4.5703125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.16%) |Training time=0.48s (22.21%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50
-[2023-04-14 12:06:53,724] [INFO] [logging.py:96:log_dist] [Rank 0] step=5420, skipped=70, lr=[2.724340990134006e-06, 2.724340990134006e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:06:53,742] [INFO] [timer.py:199:stop] epoch=0/micro_step=5420/global_step=5420, RunningAvgSamplesPerSec=105.39063099074835, CurrSamplesPerSec=103.22773090500765, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:06:53,834] [INFO] [logging.py:96:log_dist] [Rank 0] step=5420, skipped=88, lr=[1.4271984780684778e-06, 1.4271984780684778e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5419|ppo_ep: 1|act_loss: 1.4781951904296875e-05|cri_loss: 0.00043654441833496094|unsuper_loss: 0.0
-average reward score: 5.55859375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.47s (21.99%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5420|ppo_ep: 1|act_loss: -0.0020809173583984375|cri_loss: -0.0009274482727050781|unsuper_loss: 0.0
-average reward score: 4.046875
--------------------------------------------------------------------------------------
-|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.58%) |Training time=0.50s (21.18%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.57 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5421|ppo_ep: 1|act_loss: 0.0184173583984375|cri_loss: 0.00998687744140625|unsuper_loss: 0.0
-average reward score: 4.9921875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.58%) |Training time=0.47s (21.68%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5422|ppo_ep: 1|act_loss: 0.0377197265625|cri_loss: 0.019622802734375|unsuper_loss: 0.0
-average reward score: 6.3125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.48s (22.14%) |Others=0.10 (4.80%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5423|ppo_ep: 1|act_loss: 0.007061004638671875|cri_loss: 0.0047607421875|unsuper_loss: 0.0
-average reward score: 5.26171875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.13%) |Training time=0.48s (22.23%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5424|ppo_ep: 1|act_loss: -0.003932952880859375|cri_loss: -0.0017566680908203125|unsuper_loss: 0.0
-average reward score: 4.0078125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.37%) |Training time=0.47s (21.97%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5425|ppo_ep: 1|act_loss: 0.0092010498046875|cri_loss: 0.004894256591796875|unsuper_loss: 0.0
-average reward score: 5.08984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.17%) |Training time=0.48s (22.21%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5426|ppo_ep: 1|act_loss: 0.0012369155883789062|cri_loss: 0.0012493133544921875|unsuper_loss: 0.0
-average reward score: 4.671875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.31%) |Training time=0.47s (22.03%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5427|ppo_ep: 1|act_loss: 0.018707275390625|cri_loss: 0.01065826416015625|unsuper_loss: 0.0
-average reward score: 5.1875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.21%) |Training time=0.48s (22.14%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5428|ppo_ep: 1|act_loss: -0.0004367828369140625|cri_loss: 4.744529724121094e-05|unsuper_loss: 0.0
-average reward score: 6.0
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.10%) |Training time=0.48s (22.26%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.50
-[2023-04-14 12:07:15,439] [INFO] [logging.py:96:log_dist] [Rank 0] step=5430, skipped=70, lr=[2.7076333376727596e-06, 2.7076333376727596e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:07:15,458] [INFO] [timer.py:199:stop] epoch=0/micro_step=5430/global_step=5430, RunningAvgSamplesPerSec=105.38353117169935, CurrSamplesPerSec=104.13257693310327, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:07:15,550] [INFO] [logging.py:96:log_dist] [Rank 0] step=5430, skipped=88, lr=[1.4185127163348057e-06, 1.4185127163348057e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5429|ppo_ep: 1|act_loss: -0.0153350830078125|cri_loss: -0.007526397705078125|unsuper_loss: 0.0
-average reward score: 5.08984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.87%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5430|ppo_ep: 1|act_loss: -0.00426483154296875|cri_loss: -0.002044677734375|unsuper_loss: 0.0
-average reward score: 5.65234375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.30%) |Training time=0.47s (22.05%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5431|ppo_ep: 1|act_loss: 0.0103607177734375|cri_loss: 0.0079803466796875|unsuper_loss: 0.0
-average reward score: 5.43359375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.25%) |Training time=0.48s (22.08%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5432|ppo_ep: 1|act_loss: 0.003936767578125|cri_loss: 0.00244903564453125|unsuper_loss: 0.0
-average reward score: 5.59765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (21.94%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5433|ppo_ep: 1|act_loss: -0.005859375|cri_loss: -0.00262451171875|unsuper_loss: 0.0
-average reward score: 5.69921875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.47s (21.79%) |Others=0.10 (4.78%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5434|ppo_ep: 1|act_loss: 0.02886962890625|cri_loss: 0.01512908935546875|unsuper_loss: 0.0
-average reward score: 4.7890625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.96%) |Training time=0.49s (22.45%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5435|ppo_ep: 1|act_loss: 0.0050048828125|cri_loss: 0.0025997161865234375|unsuper_loss: 0.0
-average reward score: 4.9765625
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.60s (71.87%) |Training time=0.49s (21.83%) |Others=0.14 (6.30%)|CurSamplesPerSec=14.40 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5436|ppo_ep: 1|act_loss: -0.01375579833984375|cri_loss: -0.006671905517578125|unsuper_loss: 0.0
-average reward score: 5.58984375
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.68s (74.51%) |Training time=0.47s (20.98%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.19 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5437|ppo_ep: 1|act_loss: 0.007053375244140625|cri_loss: 0.00396728515625|unsuper_loss: 0.0
-average reward score: 5.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.48%) |Training time=0.48s (21.94%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5438|ppo_ep: 1|act_loss: -0.014312744140625|cri_loss: -0.0068359375|unsuper_loss: 0.0
-average reward score: 5.15625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.68%) |Training time=0.47s (21.71%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.50
-[2023-04-14 12:07:37,238] [INFO] [logging.py:96:log_dist] [Rank 0] step=5440, skipped=70, lr=[2.69095706969332e-06, 2.69095706969332e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:07:37,257] [INFO] [timer.py:199:stop] epoch=0/micro_step=5440/global_step=5440, RunningAvgSamplesPerSec=105.37736171331294, CurrSamplesPerSec=105.67992189222387, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:07:37,349] [INFO] [logging.py:96:log_dist] [Rank 0] step=5440, skipped=88, lr=[1.4098429848516231e-06, 1.4098429848516231e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5439|ppo_ep: 1|act_loss: -0.002811431884765625|cri_loss: -0.0012006759643554688|unsuper_loss: 0.0
-average reward score: 4.47265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.88%) |Training time=0.47s (21.51%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5440|ppo_ep: 1|act_loss: -0.0201873779296875|cri_loss: -0.00963592529296875|unsuper_loss: 0.0
-average reward score: 5.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.59%) |Training time=0.48s (21.83%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5441|ppo_ep: 1|act_loss: -0.0162506103515625|cri_loss: -0.007709503173828125|unsuper_loss: 0.0
-average reward score: 4.734375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.19%) |Training time=0.47s (21.26%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5442|ppo_ep: 1|act_loss: 0.10546875|cri_loss: 0.05682373046875|unsuper_loss: 0.0
-average reward score: 5.04296875
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.90%) |Training time=0.47s (20.68%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.08 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5443|ppo_ep: 1|act_loss: 0.0166015625|cri_loss: 0.0086212158203125|unsuper_loss: 0.0
-average reward score: 5.61328125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.87%) |Training time=0.46s (21.49%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5444|ppo_ep: 1|act_loss: -0.0185394287109375|cri_loss: -0.0087432861328125|unsuper_loss: 0.0
-average reward score: 4.95703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.70%) |Training time=0.47s (21.67%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5445|ppo_ep: 1|act_loss: 0.0125274658203125|cri_loss: 0.00646209716796875|unsuper_loss: 0.0
-average reward score: 6.6328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.80%) |Training time=0.47s (21.55%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5446|ppo_ep: 1|act_loss: 0.0301513671875|cri_loss: 0.0159759521484375|unsuper_loss: 0.0
-average reward score: 5.2578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.79%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5447|ppo_ep: 1|act_loss: 0.0225982666015625|cri_loss: 0.01230621337890625|unsuper_loss: 0.0
-average reward score: 5.01953125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.62%) |Training time=0.47s (21.69%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5448|ppo_ep: 1|act_loss: -0.020538330078125|cri_loss: -0.009857177734375|unsuper_loss: 0.0
-average reward score: 5.35546875
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.44%) |Training time=0.48s (21.91%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.50
-[2023-04-14 12:07:59,090] [INFO] [logging.py:96:log_dist] [Rank 0] step=5450, skipped=70, lr=[2.6743124333782032e-06, 2.6743124333782032e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:07:59,109] [INFO] [timer.py:199:stop] epoch=0/micro_step=5450/global_step=5450, RunningAvgSamplesPerSec=105.37402939249111, CurrSamplesPerSec=102.53619653180074, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:07:59,202] [INFO] [logging.py:96:log_dist] [Rank 0] step=5450, skipped=88, lr=[1.4011894121252672e-06, 1.4011894121252672e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5449|ppo_ep: 1|act_loss: -0.00958251953125|cri_loss: -0.00408935546875|unsuper_loss: 0.0
-average reward score: 5.09375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.47s (21.90%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5450|ppo_ep: 1|act_loss: 0.00853729248046875|cri_loss: 0.004505157470703125|unsuper_loss: 0.0
-average reward score: 4.96875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.04%) |Training time=0.49s (22.37%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5451|ppo_ep: 1|act_loss: -0.00821685791015625|cri_loss: -0.0039215087890625|unsuper_loss: 0.0
-average reward score: 4.9140625
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.30%) |Training time=0.48s (20.40%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5452|ppo_ep: 1|act_loss: -0.01424407958984375|cri_loss: -0.006511688232421875|unsuper_loss: 0.0
-average reward score: 5.15625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.13%) |Training time=0.48s (22.23%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5453|ppo_ep: 1|act_loss: -0.027374267578125|cri_loss: -0.0124053955078125|unsuper_loss: 0.0
-average reward score: 4.53515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.52%) |Training time=0.47s (21.87%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5454|ppo_ep: 1|act_loss: -0.0054931640625|cri_loss: -0.002529144287109375|unsuper_loss: 0.0
-average reward score: 5.34765625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.47s (21.88%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5455|ppo_ep: 1|act_loss: -0.007137298583984375|cri_loss: -0.003376007080078125|unsuper_loss: 0.0
-average reward score: 5.12109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.67%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5456|ppo_ep: 1|act_loss: 0.020599365234375|cri_loss: 0.0107574462890625|unsuper_loss: 0.0
-average reward score: 5.07421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.81%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5457|ppo_ep: 1|act_loss: -0.03179931640625|cri_loss: -0.01404571533203125|unsuper_loss: 0.0
-average reward score: 4.5625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.82%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5458|ppo_ep: 1|act_loss: 0.0020694732666015625|cri_loss: 0.0013256072998046875|unsuper_loss: 0.0
-average reward score: 4.359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.79%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50
-[2023-04-14 12:08:20,904] [INFO] [logging.py:96:log_dist] [Rank 0] step=5460, skipped=70, lr=[2.6576996754410752e-06, 2.6576996754410752e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:08:20,922] [INFO] [timer.py:199:stop] epoch=0/micro_step=5460/global_step=5460, RunningAvgSamplesPerSec=105.36840179701572, CurrSamplesPerSec=102.48804438627545, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:08:21,015] [INFO] [logging.py:96:log_dist] [Rank 0] step=5460, skipped=88, lr=[1.3925521264225587e-06, 1.3925521264225587e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5459|ppo_ep: 1|act_loss: -0.00409698486328125|cri_loss: -0.0016803741455078125|unsuper_loss: 0.0
-average reward score: 5.59375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.47s (21.93%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5460|ppo_ep: 1|act_loss: 0.01171875|cri_loss: 0.0059967041015625|unsuper_loss: 0.0
-average reward score: 4.5625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.62%) |Training time=0.47s (21.73%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5461|ppo_ep: 1|act_loss: -0.004016876220703125|cri_loss: 0.001621246337890625|unsuper_loss: 0.0
-average reward score: 5.6953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.93%) |Training time=0.46s (21.44%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5462|ppo_ep: 1|act_loss: -0.0219879150390625|cri_loss: -0.0105743408203125|unsuper_loss: 0.0
-average reward score: 4.41015625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.87%) |Training time=0.47s (21.52%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5463|ppo_ep: 1|act_loss: 0.010955810546875|cri_loss: 0.0068206787109375|unsuper_loss: 0.0
-average reward score: 4.73828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.75%) |Training time=0.47s (21.62%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5464|ppo_ep: 1|act_loss: 0.005199432373046875|cri_loss: 0.0028209686279296875|unsuper_loss: 0.0
-average reward score: 4.35546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.90%) |Training time=0.46s (21.47%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5465|ppo_ep: 1|act_loss: 0.00040841102600097656|cri_loss: 0.0008087158203125|unsuper_loss: 0.0
-average reward score: 5.42578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.86%) |Training time=0.46s (21.49%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5466|ppo_ep: 1|act_loss: -0.018341064453125|cri_loss: -0.00862884521484375|unsuper_loss: 0.0
-average reward score: 5.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.74s (74.94%) |Training time=0.48s (20.75%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5467|ppo_ep: 1|act_loss: 0.006927490234375|cri_loss: 0.0036773681640625|unsuper_loss: 0.0
-average reward score: 5.17578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.06%) |Training time=0.48s (22.30%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5468|ppo_ep: 1|act_loss: 0.01983642578125|cri_loss: 0.01010894775390625|unsuper_loss: 0.0
-average reward score: 5.78125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.14%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50
-[2023-04-14 12:08:42,709] [INFO] [logging.py:96:log_dist] [Rank 0] step=5470, skipped=70, lr=[2.641119042123085e-06, 2.641119042123085e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:08:42,727] [INFO] [timer.py:199:stop] epoch=0/micro_step=5470/global_step=5470, RunningAvgSamplesPerSec=105.36424124670562, CurrSamplesPerSec=99.5353366808979, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:08:42,821] [INFO] [logging.py:96:log_dist] [Rank 0] step=5470, skipped=88, lr=[1.383931255768909e-06, 1.383931255768909e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5469|ppo_ep: 1|act_loss: 0.00701904296875|cri_loss: 0.00399017333984375|unsuper_loss: 0.0
-average reward score: 5.46484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.97%) |Training time=0.48s (22.37%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5470|ppo_ep: 1|act_loss: -0.0755615234375|cri_loss: -0.035247802734375|unsuper_loss: 0.0
-average reward score: 5.08203125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.50%) |Training time=0.49s (22.24%) |Others=0.12 (5.27%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5471|ppo_ep: 1|act_loss: -0.0297088623046875|cri_loss: -0.013824462890625|unsuper_loss: 0.0
-average reward score: 7.29296875
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.68s (74.39%) |Training time=0.48s (21.17%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.15 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5472|ppo_ep: 1|act_loss: -0.00472259521484375|cri_loss: -0.0017681121826171875|unsuper_loss: 0.0
-average reward score: 5.78125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.72%) |Training time=0.49s (22.65%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5473|ppo_ep: 1|act_loss: -0.00201416015625|cri_loss: -0.00042247772216796875|unsuper_loss: 0.0
-average reward score: 5.02734375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.74%) |Training time=0.49s (22.47%) |Others=0.10 (4.79%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5474|ppo_ep: 1|act_loss: 0.040985107421875|cri_loss: 0.02191162109375|unsuper_loss: 0.0
-average reward score: 4.43359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.97%) |Training time=0.48s (22.40%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50
-[2023-04-14 12:08:55,918] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 5475|ppo_ep: 1|act_loss: -0.01708984375|cri_loss: -0.0081634521484375|unsuper_loss: 0.0
-average reward score: 5.72265625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.26%) |Training time=0.49s (22.58%) |Others=0.09 (4.16%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.50
-[2023-04-14 12:08:58,069] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 5476|ppo_ep: 1|act_loss: 0.01763916015625|cri_loss: 0.0101776123046875|unsuper_loss: 0.0
-average reward score: 5.32421875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.17%) |Training time=0.49s (22.68%) |Others=0.09 (4.15%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5477|ppo_ep: 1|act_loss: -0.0136566162109375|cri_loss: -0.006420135498046875|unsuper_loss: 0.0
-average reward score: 4.8203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.73%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5478|ppo_ep: 1|act_loss: -0.00641632080078125|cri_loss: -0.0027980804443359375|unsuper_loss: 0.0
-average reward score: 4.64453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.76%) |Training time=0.47s (21.60%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50
-[2023-04-14 12:09:04,432] [INFO] [logging.py:96:log_dist] [Rank 0] step=5480, skipped=70, lr=[2.624570779189218e-06, 2.624570779189218e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:09:04,450] [INFO] [timer.py:199:stop] epoch=0/micro_step=5480/global_step=5480, RunningAvgSamplesPerSec=105.35506986444176, CurrSamplesPerSec=103.88135870015302, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:09:04,543] [INFO] [logging.py:96:log_dist] [Rank 0] step=5480, skipped=90, lr=[1.3770464639591064e-06, 1.3770464639591064e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5479|ppo_ep: 1|act_loss: 0.081298828125|cri_loss: 0.0440673828125|unsuper_loss: 0.0
-average reward score: 4.8359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.77%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5480|ppo_ep: 1|act_loss: 0.0038700103759765625|cri_loss: 0.0020809173583984375|unsuper_loss: 0.0
-average reward score: 5.1484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.13%) |Training time=0.48s (22.23%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5481|ppo_ep: 1|act_loss: 0.002227783203125|cri_loss: 0.0013637542724609375|unsuper_loss: 0.0
-average reward score: 5.58203125
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.74s (74.94%) |Training time=0.48s (20.76%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5482|ppo_ep: 1|act_loss: 0.04876708984375|cri_loss: 0.02801513671875|unsuper_loss: 0.0
-average reward score: 4.67578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.93%) |Training time=0.49s (22.46%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5483|ppo_ep: 1|act_loss: -0.0153961181640625|cri_loss: -0.0074615478515625|unsuper_loss: 0.0
-average reward score: 4.8984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.18%) |Training time=0.48s (22.18%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5484|ppo_ep: 1|act_loss: -0.034210205078125|cri_loss: -0.0165557861328125|unsuper_loss: 0.0
-average reward score: 4.9453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.00%) |Training time=0.48s (22.35%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5485|ppo_ep: 1|act_loss: -0.01233673095703125|cri_loss: -0.005908966064453125|unsuper_loss: 0.0
-average reward score: 4.61328125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.97%) |Training time=0.48s (22.42%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5486|ppo_ep: 1|act_loss: -0.01239013671875|cri_loss: -0.005908966064453125|unsuper_loss: 0.0
-average reward score: 5.265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.92%) |Training time=0.49s (22.47%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5487|ppo_ep: 1|act_loss: 0.019683837890625|cri_loss: 0.01030731201171875|unsuper_loss: 0.0
-average reward score: 4.40234375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.03%) |Training time=0.48s (22.35%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5488|ppo_ep: 1|act_loss: -0.0013446807861328125|cri_loss: -0.0006318092346191406|unsuper_loss: 0.0
-average reward score: 4.875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.03%) |Training time=0.48s (22.34%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50
-[2023-04-14 12:09:26,196] [INFO] [logging.py:96:log_dist] [Rank 0] step=5490, skipped=70, lr=[2.6080551319246483e-06, 2.6080551319246483e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:09:26,214] [INFO] [timer.py:199:stop] epoch=0/micro_step=5490/global_step=5490, RunningAvgSamplesPerSec=105.34444744793811, CurrSamplesPerSec=99.1912971502897, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:09:26,306] [INFO] [logging.py:96:log_dist] [Rank 0] step=5490, skipped=90, lr=[1.3684554622399404e-06, 1.3684554622399404e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5489|ppo_ep: 1|act_loss: 0.0125579833984375|cri_loss: 0.006900787353515625|unsuper_loss: 0.0
-average reward score: 5.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.96%) |Training time=0.49s (22.43%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5490|ppo_ep: 1|act_loss: 0.0027523040771484375|cri_loss: 0.0015411376953125|unsuper_loss: 0.0
-average reward score: 4.28125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.18%) |Training time=0.48s (22.20%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5491|ppo_ep: 1|act_loss: 0.029052734375|cri_loss: 0.014892578125|unsuper_loss: 0.0
-average reward score: 5.37109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.43%) |Training time=0.47s (21.91%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5492|ppo_ep: 1|act_loss: -0.00307464599609375|cri_loss: -0.00016021728515625|unsuper_loss: 0.0
-average reward score: 5.15234375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.72%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5493|ppo_ep: 1|act_loss: 0.0474853515625|cri_loss: 0.025177001953125|unsuper_loss: 0.0
-average reward score: 4.61328125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.51%) |Training time=0.45s (20.85%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5494|ppo_ep: 1|act_loss: -0.020263671875|cri_loss: -0.0094757080078125|unsuper_loss: 0.0
-average reward score: 4.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.28%) |Training time=0.48s (22.12%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5495|ppo_ep: 1|act_loss: -0.0246429443359375|cri_loss: -0.0103759765625|unsuper_loss: 0.0
-average reward score: 5.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.97%) |Training time=0.46s (21.33%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5496|ppo_ep: 1|act_loss: -0.0308074951171875|cri_loss: -0.0125885009765625|unsuper_loss: 0.0
-average reward score: 4.64453125
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.70s (72.86%) |Training time=0.51s (21.72%) |Others=0.13 (5.43%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5497|ppo_ep: 1|act_loss: -0.038238525390625|cri_loss: -0.0164947509765625|unsuper_loss: 0.0
-average reward score: 5.1015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.46s (21.30%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5498|ppo_ep: 1|act_loss: -0.0411376953125|cri_loss: -0.019500732421875|unsuper_loss: 0.0
-average reward score: 4.7265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.82%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.50
-[2023-04-14 12:09:48,028] [INFO] [logging.py:96:log_dist] [Rank 0] step=5500, skipped=70, lr=[2.591572345131111e-06, 2.591572345131111e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:09:48,046] [INFO] [timer.py:199:stop] epoch=0/micro_step=5500/global_step=5500, RunningAvgSamplesPerSec=105.3416564703474, CurrSamplesPerSec=104.90333129598685, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:09:48,139] [INFO] [logging.py:96:log_dist] [Rank 0] step=5500, skipped=90, lr=[1.3598812327405274e-06, 1.3598812327405274e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5499|ppo_ep: 1|act_loss: -0.023468017578125|cri_loss: -0.0111083984375|unsuper_loss: 0.0
-average reward score: 4.46484375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.68%) |Training time=0.47s (21.65%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5500|ppo_ep: 1|act_loss: -0.010467529296875|cri_loss: -0.003673553466796875|unsuper_loss: 0.0
-average reward score: 5.546875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.86%) |Training time=0.45s (20.52%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5501|ppo_ep: 1|act_loss: -0.0016307830810546875|cri_loss: -0.0007171630859375|unsuper_loss: 0.0
-average reward score: 5.59375
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.10%) |Training time=0.46s (20.46%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.08 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5502|ppo_ep: 1|act_loss: 0.020538330078125|cri_loss: 0.010650634765625|unsuper_loss: 0.0
-average reward score: 3.98828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.34%) |Training time=0.48s (22.00%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5503|ppo_ep: 1|act_loss: 0.0221099853515625|cri_loss: 0.01146697998046875|unsuper_loss: 0.0
-average reward score: 5.8046875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.48%) |Training time=0.48s (21.92%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5504|ppo_ep: 1|act_loss: 0.015380859375|cri_loss: 0.008544921875|unsuper_loss: 0.0
-average reward score: 5.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.11%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5505|ppo_ep: 1|act_loss: -0.045379638671875|cri_loss: -0.021820068359375|unsuper_loss: 0.0
-average reward score: 6.23046875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (21.98%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5506|ppo_ep: 1|act_loss: 0.0024051666259765625|cri_loss: 0.0026035308837890625|unsuper_loss: 0.0
-average reward score: 5.1796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (22.02%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5507|ppo_ep: 1|act_loss: 0.017333984375|cri_loss: 0.00897979736328125|unsuper_loss: 0.0
-average reward score: 5.5625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.49%) |Training time=0.48s (21.87%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5508|ppo_ep: 1|act_loss: 0.03814697265625|cri_loss: 0.0200347900390625|unsuper_loss: 0.0
-average reward score: 4.98828125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.49%) |Training time=0.48s (21.91%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.50
-[2023-04-14 12:10:09,877] [INFO] [logging.py:96:log_dist] [Rank 0] step=5510, skipped=70, lr=[2.5751226631232703e-06, 2.5751226631232703e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:10:09,895] [INFO] [timer.py:199:stop] epoch=0/micro_step=5510/global_step=5510, RunningAvgSamplesPerSec=105.33779970588965, CurrSamplesPerSec=105.1696580948784, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:10:09,987] [INFO] [logging.py:96:log_dist] [Rank 0] step=5510, skipped=90, lr=[1.3513239025516312e-06, 1.3513239025516312e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5509|ppo_ep: 1|act_loss: 0.01238250732421875|cri_loss: 0.00702667236328125|unsuper_loss: 0.0
-average reward score: 4.78125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.58%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5510|ppo_ep: 1|act_loss: -0.0090484619140625|cri_loss: -0.0040740966796875|unsuper_loss: 0.0
-average reward score: 4.44140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.84%) |Training time=0.46s (21.50%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5511|ppo_ep: 1|act_loss: 0.001495361328125|cri_loss: 0.00122833251953125|unsuper_loss: 0.0
-average reward score: 4.609375
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.76%) |Training time=0.46s (19.92%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.74 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5512|ppo_ep: 1|act_loss: -0.004703521728515625|cri_loss: -0.0022373199462890625|unsuper_loss: 0.0
-average reward score: 5.6484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.91%) |Training time=0.46s (21.46%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5513|ppo_ep: 1|act_loss: -0.0171966552734375|cri_loss: -0.00771331787109375|unsuper_loss: 0.0
-average reward score: 5.3359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.56%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5514|ppo_ep: 1|act_loss: -0.035400390625|cri_loss: -0.017181396484375|unsuper_loss: 0.0
-average reward score: 4.69921875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.63%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5515|ppo_ep: 1|act_loss: -0.051422119140625|cri_loss: -0.0250396728515625|unsuper_loss: 0.0
-average reward score: 5.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.80%) |Training time=0.47s (21.57%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5516|ppo_ep: 1|act_loss: -0.009002685546875|cri_loss: -0.00354766845703125|unsuper_loss: 0.0
-average reward score: 4.46484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.46s (21.53%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5517|ppo_ep: 1|act_loss: -0.044677734375|cri_loss: -0.021240234375|unsuper_loss: 0.0
-average reward score: 4.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.46s (21.54%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5518|ppo_ep: 1|act_loss: 0.028076171875|cri_loss: 0.01453399658203125|unsuper_loss: 0.0
-average reward score: 5.515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.70%) |Training time=0.47s (21.66%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50
-[2023-04-14 12:10:31,647] [INFO] [logging.py:96:log_dist] [Rank 0] step=5520, skipped=70, lr=[2.5587063297250976e-06, 2.5587063297250976e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:10:31,665] [INFO] [timer.py:199:stop] epoch=0/micro_step=5520/global_step=5520, RunningAvgSamplesPerSec=105.33906190165855, CurrSamplesPerSec=109.92073812427265, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:10:31,758] [INFO] [logging.py:96:log_dist] [Rank 0] step=5520, skipped=90, lr=[1.342783598513529e-06, 1.342783598513529e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5519|ppo_ep: 1|act_loss: 0.0145111083984375|cri_loss: 0.00746917724609375|unsuper_loss: 0.0
-average reward score: 4.609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.42%) |Training time=0.45s (20.96%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5520|ppo_ep: 1|act_loss: 0.0042877197265625|cri_loss: 0.00226593017578125|unsuper_loss: 0.0
-average reward score: 4.6484375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.67%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5521|ppo_ep: 1|act_loss: -0.011566162109375|cri_loss: -0.00533294677734375|unsuper_loss: 0.0
-average reward score: 5.86328125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.74%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5522|ppo_ep: 1|act_loss: 0.009521484375|cri_loss: 0.004878997802734375|unsuper_loss: 0.0
-average reward score: 5.0078125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.72%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5523|ppo_ep: 1|act_loss: 0.01145172119140625|cri_loss: 0.0062408447265625|unsuper_loss: 0.0
-average reward score: 4.734375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.58%) |Training time=0.45s (20.75%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5524|ppo_ep: 1|act_loss: -0.00016379356384277344|cri_loss: 0.00010895729064941406|unsuper_loss: 0.0
-average reward score: 5.171875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.18%) |Training time=0.46s (21.17%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5525|ppo_ep: 1|act_loss: -0.0079498291015625|cri_loss: -0.003627777099609375|unsuper_loss: 0.0
-average reward score: 4.75
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.60s (71.96%) |Training time=0.52s (23.35%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.42 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5526|ppo_ep: 1|act_loss: -0.0076751708984375|cri_loss: -0.0037555694580078125|unsuper_loss: 0.0
-average reward score: 4.5859375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.23%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5527|ppo_ep: 1|act_loss: 0.01256561279296875|cri_loss: 0.00673675537109375|unsuper_loss: 0.0
-average reward score: 5.83984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.33%) |Training time=0.45s (21.03%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5528|ppo_ep: 1|act_loss: 0.04022216796875|cri_loss: 0.024658203125|unsuper_loss: 0.0
-average reward score: 4.41015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.46s (21.35%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50
-[2023-04-14 12:10:53,298] [INFO] [logging.py:96:log_dist] [Rank 0] step=5530, skipped=70, lr=[2.5423235882662523e-06, 2.5423235882662523e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:10:53,316] [INFO] [timer.py:199:stop] epoch=0/micro_step=5530/global_step=5530, RunningAvgSamplesPerSec=105.33915297205107, CurrSamplesPerSec=106.70993440034474, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:10:53,409] [INFO] [logging.py:96:log_dist] [Rank 0] step=5530, skipped=90, lr=[1.3342604472141296e-06, 1.3342604472141296e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5529|ppo_ep: 1|act_loss: -0.0207672119140625|cri_loss: -0.00991058349609375|unsuper_loss: 0.0
-average reward score: 4.6953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.07%) |Training time=0.46s (21.33%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5530|ppo_ep: 1|act_loss: -0.0274200439453125|cri_loss: -0.01328277587890625|unsuper_loss: 0.0
-average reward score: 5.359375
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.50%) |Training time=0.46s (20.08%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.08 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5531|ppo_ep: 1|act_loss: 0.009490966796875|cri_loss: 0.005069732666015625|unsuper_loss: 0.0
-average reward score: 4.8203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.11%) |Training time=0.46s (21.30%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5532|ppo_ep: 1|act_loss: -0.006946563720703125|cri_loss: -0.00295257568359375|unsuper_loss: 0.0
-average reward score: 5.43359375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.44%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5533|ppo_ep: 1|act_loss: 0.03204345703125|cri_loss: 0.016845703125|unsuper_loss: 0.0
-average reward score: 5.41015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.84%) |Training time=0.46s (21.53%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5534|ppo_ep: 1|act_loss: -0.0038604736328125|cri_loss: -0.001781463623046875|unsuper_loss: 0.0
-average reward score: 5.5390625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.29%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5535|ppo_ep: 1|act_loss: -0.01529693603515625|cri_loss: -0.0073699951171875|unsuper_loss: 0.0
-average reward score: 4.7265625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.25%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5536|ppo_ep: 1|act_loss: -0.0262298583984375|cri_loss: -0.0125732421875|unsuper_loss: 0.0
-average reward score: 5.5390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.65%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5537|ppo_ep: 1|act_loss: 0.0080108642578125|cri_loss: 0.0042877197265625|unsuper_loss: 0.0
-average reward score: 5.44140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.93%) |Training time=0.46s (21.41%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5538|ppo_ep: 1|act_loss: -0.0191802978515625|cri_loss: -0.00933837890625|unsuper_loss: 0.0
-average reward score: 5.68359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.05%) |Training time=0.44s (20.28%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50
-[2023-04-14 12:11:14,996] [INFO] [logging.py:96:log_dist] [Rank 0] step=5540, skipped=70, lr=[2.5259746815784854e-06, 2.5259746815784854e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:11:15,014] [INFO] [timer.py:199:stop] epoch=0/micro_step=5540/global_step=5540, RunningAvgSamplesPerSec=105.344337416235, CurrSamplesPerSec=110.70370709457649, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:11:15,107] [INFO] [logging.py:96:log_dist] [Rank 0] step=5540, skipped=90, lr=[1.3257545749870973e-06, 1.3257545749870973e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5539|ppo_ep: 1|act_loss: -0.007534027099609375|cri_loss: -0.00356292724609375|unsuper_loss: 0.0
-average reward score: 5.08984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.41%) |Training time=0.45s (20.96%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5540|ppo_ep: 1|act_loss: 0.03045654296875|cri_loss: 0.0161895751953125|unsuper_loss: 0.0
-average reward score: 5.3828125
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.76%) |Training time=0.46s (19.92%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.77 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5541|ppo_ep: 1|act_loss: -0.0088043212890625|cri_loss: -0.003543853759765625|unsuper_loss: 0.0
-average reward score: 4.87109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.03%) |Training time=0.46s (21.32%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5542|ppo_ep: 1|act_loss: -0.0079803466796875|cri_loss: -0.003627777099609375|unsuper_loss: 0.0
-average reward score: 5.8046875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.84%) |Training time=0.46s (21.54%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5543|ppo_ep: 1|act_loss: 0.0159912109375|cri_loss: 0.0084991455078125|unsuper_loss: 0.0
-average reward score: 5.6328125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.46s (21.53%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5544|ppo_ep: 1|act_loss: -0.01409912109375|cri_loss: -0.00643157958984375|unsuper_loss: 0.0
-average reward score: 5.3203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.85%) |Training time=0.46s (21.51%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5545|ppo_ep: 1|act_loss: 0.0128326416015625|cri_loss: 0.00830841064453125|unsuper_loss: 0.0
-average reward score: 5.33984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.81%) |Training time=0.47s (21.56%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5546|ppo_ep: 1|act_loss: 0.002899169921875|cri_loss: 0.0016632080078125|unsuper_loss: 0.0
-average reward score: 5.765625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.83%) |Training time=0.49s (22.54%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5547|ppo_ep: 1|act_loss: 0.00019216537475585938|cri_loss: 0.00024962425231933594|unsuper_loss: 0.0
-average reward score: 4.4296875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.49%) |Training time=0.47s (21.87%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5548|ppo_ep: 1|act_loss: -0.00588226318359375|cri_loss: -0.002727508544921875|unsuper_loss: 0.0
-average reward score: 5.62890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.99%) |Training time=0.48s (22.40%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50
-[2023-04-14 12:11:36,745] [INFO] [logging.py:96:log_dist] [Rank 0] step=5550, skipped=70, lr=[2.5096598519920344e-06, 2.5096598519920344e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:11:36,763] [INFO] [timer.py:199:stop] epoch=0/micro_step=5550/global_step=5550, RunningAvgSamplesPerSec=105.34174265021153, CurrSamplesPerSec=101.51943477330195, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:11:36,856] [INFO] [logging.py:96:log_dist] [Rank 0] step=5550, skipped=90, lr=[1.3172661079099752e-06, 1.3172661079099752e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5549|ppo_ep: 1|act_loss: 0.0240631103515625|cri_loss: 0.01264190673828125|unsuper_loss: 0.0
-average reward score: 6.34765625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.21%) |Training time=0.48s (22.16%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5550|ppo_ep: 1|act_loss: 0.01739501953125|cri_loss: 0.00885772705078125|unsuper_loss: 0.0
-average reward score: 4.58203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.33%) |Training time=0.48s (22.03%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5551|ppo_ep: 1|act_loss: 0.00698089599609375|cri_loss: 0.00400543212890625|unsuper_loss: 0.0
-average reward score: 5.34375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.08%) |Training time=0.48s (22.30%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5552|ppo_ep: 1|act_loss: 0.049072265625|cri_loss: 0.025238037109375|unsuper_loss: 0.0
-average reward score: 4.9140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.47s (22.03%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5553|ppo_ep: 1|act_loss: -0.00720977783203125|cri_loss: -0.00344085693359375|unsuper_loss: 0.0
-average reward score: 5.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.43%) |Training time=0.45s (20.93%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5554|ppo_ep: 1|act_loss: 0.0136260986328125|cri_loss: 0.007171630859375|unsuper_loss: 0.0
-average reward score: 4.88671875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.45s (21.09%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5555|ppo_ep: 1|act_loss: -0.016937255859375|cri_loss: -0.0080108642578125|unsuper_loss: 0.0
-average reward score: 5.74609375
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.92%) |Training time=0.46s (19.76%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5556|ppo_ep: 1|act_loss: 0.0487060546875|cri_loss: 0.025634765625|unsuper_loss: 0.0
-average reward score: 5.3828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.86%) |Training time=0.47s (21.51%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5557|ppo_ep: 1|act_loss: -0.00664520263671875|cri_loss: -0.002841949462890625|unsuper_loss: 0.0
-average reward score: 4.62109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.46s (21.21%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5558|ppo_ep: 1|act_loss: -0.01214599609375|cri_loss: -0.005767822265625|unsuper_loss: 0.0
-average reward score: 5.19921875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.71%) |Training time=0.47s (21.69%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.50
-[2023-04-14 12:11:58,543] [INFO] [logging.py:96:log_dist] [Rank 0] step=5560, skipped=70, lr=[2.4933793413320327e-06, 2.4933793413320327e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:11:58,561] [INFO] [timer.py:199:stop] epoch=0/micro_step=5560/global_step=5560, RunningAvgSamplesPerSec=105.34322717762855, CurrSamplesPerSec=109.43845430039579, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:11:58,654] [INFO] [logging.py:96:log_dist] [Rank 0] step=5560, skipped=90, lr=[1.308795171802324e-06, 1.308795171802324e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5559|ppo_ep: 1|act_loss: -0.021575927734375|cri_loss: -0.0096282958984375|unsuper_loss: 0.0
-average reward score: 5.1015625
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.71%) |Training time=0.46s (20.74%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5560|ppo_ep: 1|act_loss: 0.004123687744140625|cri_loss: 0.0024566650390625|unsuper_loss: 0.0
-average reward score: 5.58984375
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.41%) |Training time=0.45s (20.14%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.19 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5561|ppo_ep: 1|act_loss: 0.0013027191162109375|cri_loss: 0.0007872581481933594|unsuper_loss: 0.0
-average reward score: 5.375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.25%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5562|ppo_ep: 1|act_loss: -0.004364013671875|cri_loss: -0.001819610595703125|unsuper_loss: 0.0
-average reward score: 5.73046875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.05%) |Training time=0.46s (21.30%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5563|ppo_ep: 1|act_loss: -0.0013980865478515625|cri_loss: -0.0005540847778320312|unsuper_loss: 0.0
-average reward score: 5.4609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.07%) |Training time=0.46s (21.29%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5564|ppo_ep: 1|act_loss: -0.0019588470458984375|cri_loss: -0.0007309913635253906|unsuper_loss: 0.0
-average reward score: 5.265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.39%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5565|ppo_ep: 1|act_loss: 0.0224609375|cri_loss: 0.01166534423828125|unsuper_loss: 0.0
-average reward score: 5.421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.46s (21.37%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5566|ppo_ep: 1|act_loss: -0.00531768798828125|cri_loss: -0.0025119781494140625|unsuper_loss: 0.0
-average reward score: 5.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.11%) |Training time=0.46s (21.25%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5567|ppo_ep: 1|act_loss: 0.0183563232421875|cri_loss: 0.00948333740234375|unsuper_loss: 0.0
-average reward score: 5.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.83%) |Training time=0.47s (21.54%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5568|ppo_ep: 1|act_loss: 0.04541015625|cri_loss: 0.02337646484375|unsuper_loss: 0.0
-average reward score: 6.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.15%) |Training time=0.46s (21.19%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.50
-[2023-04-14 12:12:20,280] [INFO] [logging.py:96:log_dist] [Rank 0] step=5570, skipped=70, lr=[2.477133390914919e-06, 2.477133390914919e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:12:20,299] [INFO] [timer.py:199:stop] epoch=0/micro_step=5570/global_step=5570, RunningAvgSamplesPerSec=105.34633939663574, CurrSamplesPerSec=104.92982122851744, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:12:20,391] [INFO] [logging.py:96:log_dist] [Rank 0] step=5570, skipped=90, lr=[1.300341892223852e-06, 1.300341892223852e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5569|ppo_ep: 1|act_loss: -0.00798797607421875|cri_loss: -0.0038700103759765625|unsuper_loss: 0.0
-average reward score: 5.4609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.82%) |Training time=0.47s (21.59%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5570|ppo_ep: 1|act_loss: -0.003910064697265625|cri_loss: -0.0016078948974609375|unsuper_loss: 0.0
-average reward score: 5.0
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.53%) |Training time=0.47s (20.16%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5571|ppo_ep: 1|act_loss: -0.00772857666015625|cri_loss: -0.0037689208984375|unsuper_loss: 0.0
-average reward score: 4.7265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.47s (21.73%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5572|ppo_ep: 1|act_loss: 0.011016845703125|cri_loss: 0.005828857421875|unsuper_loss: 0.0
-average reward score: 5.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.83%) |Training time=0.46s (21.54%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5573|ppo_ep: 1|act_loss: -0.02130126953125|cri_loss: -0.0103607177734375|unsuper_loss: 0.0
-average reward score: 5.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.65%) |Training time=0.47s (21.73%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5574|ppo_ep: 1|act_loss: -0.018402099609375|cri_loss: -0.00804901123046875|unsuper_loss: 0.0
-average reward score: 5.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.68%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5575|ppo_ep: 1|act_loss: 0.001041412353515625|cri_loss: 0.0006079673767089844|unsuper_loss: 0.0
-average reward score: 5.65625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.68%) |Training time=0.47s (21.71%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5576|ppo_ep: 1|act_loss: 0.0243072509765625|cri_loss: 0.0126800537109375|unsuper_loss: 0.0
-average reward score: 5.375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.86%) |Training time=0.46s (21.46%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50
-[2023-04-14 12:12:37,824] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 5577|ppo_ep: 1|act_loss: 0.058837890625|cri_loss: 0.03009033203125|unsuper_loss: 0.0
-average reward score: 5.046875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.16%) |Training time=0.47s (21.69%) |Others=0.09 (4.15%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.50
-[2023-04-14 12:12:39,972] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 5578|ppo_ep: 1|act_loss: 0.04931640625|cri_loss: 0.025543212890625|unsuper_loss: 0.0
-average reward score: 4.96484375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.24%) |Training time=0.46s (21.61%) |Others=0.09 (4.15%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.50
-[2023-04-14 12:12:42,017] [INFO] [logging.py:96:log_dist] [Rank 0] step=5580, skipped=70, lr=[2.4609222415448723e-06, 2.4609222415448723e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:12:42,036] [INFO] [timer.py:199:stop] epoch=0/micro_step=5580/global_step=5580, RunningAvgSamplesPerSec=105.34619836932745, CurrSamplesPerSec=107.6787841445882, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:12:42,128] [INFO] [logging.py:96:log_dist] [Rank 0] step=5580, skipped=92, lr=[1.2935920654711779e-06, 1.2935920654711779e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5579|ppo_ep: 1|act_loss: 0.010833740234375|cri_loss: 0.00600433349609375|unsuper_loss: 0.0
-average reward score: 5.09375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.07%) |Training time=0.46s (21.31%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5580|ppo_ep: 1|act_loss: 0.005214691162109375|cri_loss: 0.002765655517578125|unsuper_loss: 0.0
-average reward score: 5.43359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.47s (21.80%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5581|ppo_ep: 1|act_loss: 0.001560211181640625|cri_loss: 0.0010395050048828125|unsuper_loss: 0.0
-average reward score: 5.1640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.46s (21.36%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5582|ppo_ep: 1|act_loss: -0.055084228515625|cri_loss: -0.026885986328125|unsuper_loss: 0.0
-average reward score: 5.5625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.46s (21.21%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5583|ppo_ep: 1|act_loss: -0.031036376953125|cri_loss: -0.0148162841796875|unsuper_loss: 0.0
-average reward score: 6.046875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.46%) |Training time=0.48s (21.92%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5584|ppo_ep: 1|act_loss: 0.0030517578125|cri_loss: 0.0017175674438476562|unsuper_loss: 0.0
-average reward score: 5.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.46s (21.37%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5585|ppo_ep: 1|act_loss: -0.010528564453125|cri_loss: -0.00510406494140625|unsuper_loss: 0.0
-average reward score: 5.515625
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.04%) |Training time=0.46s (19.65%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.77 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5586|ppo_ep: 1|act_loss: -0.0234375|cri_loss: -0.01132965087890625|unsuper_loss: 0.0
-average reward score: 4.91796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.62%) |Training time=0.46s (21.11%) |Others=0.11 (5.27%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5587|ppo_ep: 1|act_loss: 0.04998779296875|cri_loss: 0.0256805419921875|unsuper_loss: 0.0
-average reward score: 5.2421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.46s (21.37%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5588|ppo_ep: 1|act_loss: 0.00350189208984375|cri_loss: 0.0020236968994140625|unsuper_loss: 0.0
-average reward score: 5.4921875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.21%) |Training time=0.46s (21.12%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.50
-[2023-04-14 12:13:03,886] [INFO] [logging.py:96:log_dist] [Rank 0] step=5590, skipped=70, lr=[2.4447461335102328e-06, 2.4447461335102328e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:13:03,904] [INFO] [timer.py:199:stop] epoch=0/micro_step=5590/global_step=5590, RunningAvgSamplesPerSec=105.3472014072174, CurrSamplesPerSec=99.85687682696464, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:13:03,996] [INFO] [logging.py:96:log_dist] [Rank 0] step=5590, skipped=92, lr=[1.2851708832191219e-06, 1.2851708832191219e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5589|ppo_ep: 1|act_loss: -0.00678253173828125|cri_loss: -0.00315093994140625|unsuper_loss: 0.0
-average reward score: 5.7421875
--------------------------------------------------------------------------------------
-|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.11%) |Training time=0.50s (22.40%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.35 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5590|ppo_ep: 1|act_loss: 0.05072021484375|cri_loss: 0.02679443359375|unsuper_loss: 0.0
-average reward score: 5.62890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.83%) |Training time=0.47s (21.55%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5591|ppo_ep: 1|act_loss: -0.0042266845703125|cri_loss: -0.0016841888427734375|unsuper_loss: 0.0
-average reward score: 5.609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.46s (21.35%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5592|ppo_ep: 1|act_loss: 0.0079345703125|cri_loss: 0.0050506591796875|unsuper_loss: 0.0
-average reward score: 5.26953125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.24%) |Training time=0.45s (21.08%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5593|ppo_ep: 1|act_loss: -0.07049560546875|cri_loss: -0.034515380859375|unsuper_loss: 0.0
-average reward score: 5.80859375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.98%) |Training time=0.49s (22.46%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5594|ppo_ep: 1|act_loss: 0.0115966796875|cri_loss: 0.00616455078125|unsuper_loss: 0.0
-average reward score: 5.9453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.26%) |Training time=0.48s (22.13%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5595|ppo_ep: 1|act_loss: -0.01904296875|cri_loss: -0.0092620849609375|unsuper_loss: 0.0
-average reward score: 5.703125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.34%) |Training time=0.48s (22.07%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5596|ppo_ep: 1|act_loss: -0.025482177734375|cri_loss: -0.012420654296875|unsuper_loss: 0.0
-average reward score: 5.8203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.34%) |Training time=0.48s (22.06%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5597|ppo_ep: 1|act_loss: -0.030914306640625|cri_loss: -0.01505279541015625|unsuper_loss: 0.0
-average reward score: 5.28125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.10%) |Training time=0.49s (22.30%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5598|ppo_ep: 1|act_loss: -0.008148193359375|cri_loss: -0.0037078857421875|unsuper_loss: 0.0
-average reward score: 4.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.19%) |Training time=0.48s (22.19%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.50
-[2023-04-14 12:13:25,592] [INFO] [logging.py:96:log_dist] [Rank 0] step=5600, skipped=70, lr=[2.4286053065799456e-06, 2.4286053065799456e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:13:25,611] [INFO] [timer.py:199:stop] epoch=0/micro_step=5600/global_step=5600, RunningAvgSamplesPerSec=105.3410928350194, CurrSamplesPerSec=100.1417826315008, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:13:25,703] [INFO] [logging.py:96:log_dist] [Rank 0] step=5600, skipped=92, lr=[1.2767677076651913e-06, 1.2767677076651913e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5599|ppo_ep: 1|act_loss: -0.0034942626953125|cri_loss: -0.0016145706176757812|unsuper_loss: 0.0
-average reward score: 5.06640625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.27%) |Training time=0.48s (22.07%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5600|ppo_ep: 1|act_loss: 0.0264892578125|cri_loss: 0.01355743408203125|unsuper_loss: 0.0
-average reward score: 5.984375
--------------------------------------------------------------------------------------
-|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.56%) |Training time=0.50s (21.20%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.55 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5601|ppo_ep: 1|act_loss: 0.016510009765625|cri_loss: 0.0084381103515625|unsuper_loss: 0.0
-average reward score: 4.64453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.85%) |Training time=0.47s (21.54%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5602|ppo_ep: 1|act_loss: -0.00408935546875|cri_loss: -0.0019178390502929688|unsuper_loss: 0.0
-average reward score: 6.00390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.55%) |Training time=0.47s (21.84%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5603|ppo_ep: 1|act_loss: -0.001995086669921875|cri_loss: -0.000728607177734375|unsuper_loss: 0.0
-average reward score: 6.02734375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.06%) |Training time=0.46s (21.30%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5604|ppo_ep: 1|act_loss: -0.00925445556640625|cri_loss: -0.00435638427734375|unsuper_loss: 0.0
-average reward score: 4.9609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.78%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5605|ppo_ep: 1|act_loss: 0.007480621337890625|cri_loss: 0.004024505615234375|unsuper_loss: 0.0
-average reward score: 4.8984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.91%) |Training time=0.46s (21.46%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5606|ppo_ep: 1|act_loss: -0.0097808837890625|cri_loss: -0.00438690185546875|unsuper_loss: 0.0
-average reward score: 4.73046875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.58%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5607|ppo_ep: 1|act_loss: -0.001796722412109375|cri_loss: -0.0005803108215332031|unsuper_loss: 0.0
-average reward score: 5.6640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.73%) |Training time=0.47s (21.65%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5608|ppo_ep: 1|act_loss: 0.003429412841796875|cri_loss: 0.0020198822021484375|unsuper_loss: 0.0
-average reward score: 5.40234375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.96%) |Training time=0.46s (21.42%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50
-[2023-04-14 12:13:47,414] [INFO] [logging.py:96:log_dist] [Rank 0] step=5610, skipped=70, lr=[2.4125000000000015e-06, 2.4125000000000015e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:13:47,432] [INFO] [timer.py:199:stop] epoch=0/micro_step=5610/global_step=5610, RunningAvgSamplesPerSec=105.33799128956171, CurrSamplesPerSec=100.43267746437243, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:13:47,524] [INFO] [logging.py:96:log_dist] [Rank 0] step=5610, skipped=92, lr=[1.2683826633647206e-06, 1.2683826633647206e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5609|ppo_ep: 1|act_loss: 0.01202392578125|cri_loss: 0.006450653076171875|unsuper_loss: 0.0
-average reward score: 5.16796875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.14%) |Training time=0.48s (22.26%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5610|ppo_ep: 1|act_loss: -0.012664794921875|cri_loss: -0.006195068359375|unsuper_loss: 0.0
-average reward score: 5.33984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.48s (22.37%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5611|ppo_ep: 1|act_loss: 0.043304443359375|cri_loss: 0.0238037109375|unsuper_loss: 0.0
-average reward score: 5.3359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.47s (21.91%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5612|ppo_ep: 1|act_loss: 0.00029969215393066406|cri_loss: 0.00031495094299316406|unsuper_loss: 0.0
-average reward score: 5.3359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.09%) |Training time=0.48s (22.31%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5613|ppo_ep: 1|act_loss: 0.019256591796875|cri_loss: 0.0101470947265625|unsuper_loss: 0.0
-average reward score: 4.8125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.24%) |Training time=0.48s (22.13%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5614|ppo_ep: 1|act_loss: 0.002948760986328125|cri_loss: 0.0017547607421875|unsuper_loss: 0.0
-average reward score: 4.76171875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.64%) |Training time=0.48s (21.77%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5615|ppo_ep: 1|act_loss: 0.0222320556640625|cri_loss: 0.01169586181640625|unsuper_loss: 0.0
-average reward score: 5.1328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.10%) |Training time=0.48s (22.19%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5616|ppo_ep: 1|act_loss: -0.0018444061279296875|cri_loss: -0.0008955001831054688|unsuper_loss: 0.0
-average reward score: 4.73828125
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.64%) |Training time=0.49s (21.07%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.68 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5617|ppo_ep: 1|act_loss: 0.031494140625|cri_loss: 0.0165557861328125|unsuper_loss: 0.0
-average reward score: 5.58203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.10%) |Training time=0.48s (22.24%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5618|ppo_ep: 1|act_loss: 0.0479736328125|cri_loss: 0.0246734619140625|unsuper_loss: 0.0
-average reward score: 5.19140625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.79%) |Training time=0.47s (21.61%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.50
-[2023-04-14 12:14:09,378] [INFO] [logging.py:96:log_dist] [Rank 0] step=5620, skipped=70, lr=[2.396430452489896e-06, 2.396430452489896e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:14:09,396] [INFO] [timer.py:199:stop] epoch=0/micro_step=5620/global_step=5620, RunningAvgSamplesPerSec=105.32878407607137, CurrSamplesPerSec=98.12636157931868, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:14:09,489] [INFO] [logging.py:96:log_dist] [Rank 0] step=5620, skipped=92, lr=[1.260015874604294e-06, 1.260015874604294e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5619|ppo_ep: 1|act_loss: 0.005878448486328125|cri_loss: 0.00319671630859375|unsuper_loss: 0.0
-average reward score: 4.60546875
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.68s (74.07%) |Training time=0.49s (21.54%) |Others=0.10 (4.39%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5620|ppo_ep: 1|act_loss: -0.0250244140625|cri_loss: -0.01186370849609375|unsuper_loss: 0.0
-average reward score: 5.42578125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.57%) |Training time=0.47s (21.76%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5621|ppo_ep: 1|act_loss: 0.03326416015625|cri_loss: 0.0173797607421875|unsuper_loss: 0.0
-average reward score: 5.63671875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.69%) |Training time=0.46s (21.65%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5622|ppo_ep: 1|act_loss: -0.02783203125|cri_loss: -0.0135955810546875|unsuper_loss: 0.0
-average reward score: 5.21484375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.47s (21.99%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5623|ppo_ep: 1|act_loss: -0.0118560791015625|cri_loss: -0.005462646484375|unsuper_loss: 0.0
-average reward score: 5.359375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.40%) |Training time=0.47s (21.95%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5624|ppo_ep: 1|act_loss: 0.025970458984375|cri_loss: 0.0137176513671875|unsuper_loss: 0.0
-average reward score: 5.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.47s (21.99%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5625|ppo_ep: 1|act_loss: 0.0224609375|cri_loss: 0.01157379150390625|unsuper_loss: 0.0
-average reward score: 5.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.50%) |Training time=0.47s (21.84%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5626|ppo_ep: 1|act_loss: 0.015960693359375|cri_loss: 0.00885009765625|unsuper_loss: 0.0
-average reward score: 5.40234375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.47s (21.98%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5627|ppo_ep: 1|act_loss: -0.0035419464111328125|cri_loss: -0.0016145706176757812|unsuper_loss: 0.0
-average reward score: 5.19921875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.47s (22.00%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5628|ppo_ep: 1|act_loss: -0.01557159423828125|cri_loss: -0.00714874267578125|unsuper_loss: 0.0
-average reward score: 4.58984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.46%) |Training time=0.47s (21.89%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.50
-[2023-04-14 12:14:30,869] [INFO] [logging.py:96:log_dist] [Rank 0] step=5630, skipped=70, lr=[2.3803969022390895e-06, 2.3803969022390895e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:14:30,888] [INFO] [timer.py:199:stop] epoch=0/micro_step=5630/global_step=5630, RunningAvgSamplesPerSec=105.32614334516194, CurrSamplesPerSec=103.03302993013564, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:14:30,980] [INFO] [logging.py:96:log_dist] [Rank 0] step=5630, skipped=92, lr=[1.251667465399905e-06, 1.251667465399905e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5629|ppo_ep: 1|act_loss: 0.0026702880859375|cri_loss: 0.0017681121826171875|unsuper_loss: 0.0
-average reward score: 4.81640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.52%) |Training time=0.47s (21.87%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5630|ppo_ep: 1|act_loss: -0.008331298828125|cri_loss: -0.003772735595703125|unsuper_loss: 0.0
-average reward score: 4.26953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.17%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5631|ppo_ep: 1|act_loss: -0.002307891845703125|cri_loss: -4.9591064453125e-05|unsuper_loss: 0.0
-average reward score: 5.015625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.30%) |Training time=0.45s (21.06%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5632|ppo_ep: 1|act_loss: 0.0172119140625|cri_loss: 0.0088653564453125|unsuper_loss: 0.0
-average reward score: 5.7890625
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.95%) |Training time=0.49s (20.77%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.68 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5633|ppo_ep: 1|act_loss: 0.006427764892578125|cri_loss: 0.003536224365234375|unsuper_loss: 0.0
-average reward score: 5.53125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.25%) |Training time=0.45s (21.09%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5634|ppo_ep: 1|act_loss: -0.0005903244018554688|cri_loss: -9.5367431640625e-06|unsuper_loss: 0.0
-average reward score: 5.359375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.08%) |Training time=0.46s (21.30%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5635|ppo_ep: 1|act_loss: 0.02325439453125|cri_loss: 0.0118408203125|unsuper_loss: 0.0
-average reward score: 4.5390625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.26%) |Training time=0.45s (21.09%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5636|ppo_ep: 1|act_loss: 0.0030612945556640625|cri_loss: 0.00257110595703125|unsuper_loss: 0.0
-average reward score: 6.2109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.25%) |Training time=0.45s (21.10%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5637|ppo_ep: 1|act_loss: -0.00508880615234375|cri_loss: -0.002368927001953125|unsuper_loss: 0.0
-average reward score: 5.79296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.51%) |Training time=0.45s (20.83%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5638|ppo_ep: 1|act_loss: -0.00368499755859375|cri_loss: -0.0016355514526367188|unsuper_loss: 0.0
-average reward score: 5.5390625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.28%) |Training time=0.45s (21.07%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50
-[2023-04-14 12:14:52,594] [INFO] [logging.py:96:log_dist] [Rank 0] step=5640, skipped=70, lr=[2.3643995869034777e-06, 2.3643995869034777e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:14:52,612] [INFO] [timer.py:199:stop] epoch=0/micro_step=5640/global_step=5640, RunningAvgSamplesPerSec=105.33151498596196, CurrSamplesPerSec=109.83789021262527, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:14:52,705] [INFO] [logging.py:96:log_dist] [Rank 0] step=5640, skipped=92, lr=[1.2433375594951166e-06, 1.2433375594951166e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5639|ppo_ep: 1|act_loss: 0.0013294219970703125|cri_loss: 0.0008649826049804688|unsuper_loss: 0.0
-average reward score: 5.73828125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.45s (21.09%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5640|ppo_ep: 1|act_loss: -0.02569580078125|cri_loss: -0.009002685546875|unsuper_loss: 0.0
-average reward score: 5.10546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (20.96%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5641|ppo_ep: 1|act_loss: -0.0119476318359375|cri_loss: -0.005680084228515625|unsuper_loss: 0.0
-average reward score: 4.98046875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.24%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5642|ppo_ep: 1|act_loss: 0.0367431640625|cri_loss: 0.018951416015625|unsuper_loss: 0.0
-average reward score: 5.23828125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.01%) |Training time=0.47s (21.40%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5643|ppo_ep: 1|act_loss: 0.0321044921875|cri_loss: 0.016815185546875|unsuper_loss: 0.0
-average reward score: 5.34375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.33%) |Training time=0.45s (21.01%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5644|ppo_ep: 1|act_loss: -0.0030670166015625|cri_loss: -0.0014181137084960938|unsuper_loss: 0.0
-average reward score: 5.51171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.82%) |Training time=0.47s (21.55%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5645|ppo_ep: 1|act_loss: -0.01788330078125|cri_loss: -0.0072021484375|unsuper_loss: 0.0
-average reward score: 5.17578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.25%) |Training time=0.46s (21.13%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5646|ppo_ep: 1|act_loss: -0.0009388923645019531|cri_loss: -6.29425048828125e-05|unsuper_loss: 0.0
-average reward score: 5.125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.44%) |Training time=0.45s (20.91%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5647|ppo_ep: 1|act_loss: 0.03594970703125|cri_loss: 0.0187530517578125|unsuper_loss: 0.0
-average reward score: 6.3203125
--------------------------------------------------------------------------------------
-|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.12%) |Training time=0.49s (20.65%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.54 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5648|ppo_ep: 1|act_loss: 0.00289154052734375|cri_loss: 0.001529693603515625|unsuper_loss: 0.0
-average reward score: 5.6953125
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.63s (71.66%) |Training time=0.48s (21.32%) |Others=0.16 (7.02%)|CurSamplesPerSec=14.08 |AvgSamplesPerSec=14.50
-[2023-04-14 12:15:14,519] [INFO] [logging.py:96:log_dist] [Rank 0] step=5650, skipped=70, lr=[2.3484387436018617e-06, 2.3484387436018617e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:15:14,537] [INFO] [timer.py:199:stop] epoch=0/micro_step=5650/global_step=5650, RunningAvgSamplesPerSec=105.33311141352057, CurrSamplesPerSec=107.29409808518922, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:15:14,630] [INFO] [logging.py:96:log_dist] [Rank 0] step=5650, skipped=92, lr=[1.2350262803592295e-06, 1.2350262803592295e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5649|ppo_ep: 1|act_loss: -0.032562255859375|cri_loss: -0.01555633544921875|unsuper_loss: 0.0
-average reward score: 5.26171875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.04%) |Training time=0.46s (21.32%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5650|ppo_ep: 1|act_loss: -0.030517578125|cri_loss: -0.01496124267578125|unsuper_loss: 0.0
-average reward score: 5.50390625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.09%) |Training time=0.46s (21.27%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5651|ppo_ep: 1|act_loss: 0.00797271728515625|cri_loss: 0.004436492919921875|unsuper_loss: 0.0
-average reward score: 5.26953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.99%) |Training time=0.46s (21.37%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5652|ppo_ep: 1|act_loss: -0.0179290771484375|cri_loss: -0.0073394775390625|unsuper_loss: 0.0
-average reward score: 5.73046875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.13%) |Training time=0.45s (21.04%) |Others=0.10 (4.83%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5653|ppo_ep: 1|act_loss: 0.01305389404296875|cri_loss: 0.006961822509765625|unsuper_loss: 0.0
-average reward score: 5.44921875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.24%) |Training time=0.45s (21.12%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5654|ppo_ep: 1|act_loss: 0.024688720703125|cri_loss: 0.01264190673828125|unsuper_loss: 0.0
-average reward score: 5.8359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.96%) |Training time=0.46s (21.40%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5655|ppo_ep: 1|act_loss: -0.01490020751953125|cri_loss: -0.00708770751953125|unsuper_loss: 0.0
-average reward score: 5.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.30%) |Training time=0.45s (21.05%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5656|ppo_ep: 1|act_loss: -0.00908660888671875|cri_loss: -0.00432586669921875|unsuper_loss: 0.0
-average reward score: 5.39453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.31%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5657|ppo_ep: 1|act_loss: 0.010772705078125|cri_loss: 0.005657196044921875|unsuper_loss: 0.0
-average reward score: 5.5625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.13%) |Training time=0.46s (21.24%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5658|ppo_ep: 1|act_loss: 0.020751953125|cri_loss: 0.0118560791015625|unsuper_loss: 0.0
-average reward score: 5.19140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.10%) |Training time=0.46s (21.24%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50
-[2023-04-14 12:15:36,069] [INFO] [logging.py:96:log_dist] [Rank 0] step=5660, skipped=70, lr=[2.3325146089124427e-06, 2.3325146089124427e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:15:36,087] [INFO] [timer.py:199:stop] epoch=0/micro_step=5660/global_step=5660, RunningAvgSamplesPerSec=105.33806343563634, CurrSamplesPerSec=105.81956599209691, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:15:36,180] [INFO] [logging.py:96:log_dist] [Rank 0] step=5660, skipped=92, lr=[1.2267337511854502e-06, 1.2267337511854502e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5659|ppo_ep: 1|act_loss: -0.008026123046875|cri_loss: -0.00384521484375|unsuper_loss: 0.0
-average reward score: 5.99609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.89%) |Training time=0.47s (21.51%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5660|ppo_ep: 1|act_loss: 0.0150604248046875|cri_loss: 0.00789642333984375|unsuper_loss: 0.0
-average reward score: 5.44140625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.25%) |Training time=0.49s (22.18%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5661|ppo_ep: 1|act_loss: -0.02191162109375|cri_loss: -0.01050567626953125|unsuper_loss: 0.0
-average reward score: 5.234375
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.60s (71.17%) |Training time=0.47s (20.79%) |Others=0.18 (8.04%)|CurSamplesPerSec=14.24 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5662|ppo_ep: 1|act_loss: 0.005733489990234375|cri_loss: 0.003093719482421875|unsuper_loss: 0.0
-average reward score: 5.703125
--------------------------------------------------------------------------------------
-|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.67s (74.59%) |Training time=0.47s (20.92%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.27 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5663|ppo_ep: 1|act_loss: -0.0006504058837890625|cri_loss: 3.337860107421875e-06|unsuper_loss: 0.0
-average reward score: 5.6796875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.18%) |Training time=0.46s (21.18%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5664|ppo_ep: 1|act_loss: 0.0027523040771484375|cri_loss: 0.002132415771484375|unsuper_loss: 0.0
-average reward score: 4.9453125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.08%) |Training time=0.46s (21.28%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5665|ppo_ep: 1|act_loss: -0.008880615234375|cri_loss: -0.00409698486328125|unsuper_loss: 0.0
-average reward score: 4.65625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.54%) |Training time=0.45s (20.78%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5666|ppo_ep: 1|act_loss: 0.028533935546875|cri_loss: 0.0145721435546875|unsuper_loss: 0.0
-average reward score: 4.81640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.72%) |Training time=0.47s (21.67%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5667|ppo_ep: 1|act_loss: -0.037109375|cri_loss: -0.0174560546875|unsuper_loss: 0.0
-average reward score: 5.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.36%) |Training time=0.45s (20.98%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5668|ppo_ep: 1|act_loss: -0.022705078125|cri_loss: -0.0110626220703125|unsuper_loss: 0.0
-average reward score: 4.6484375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.90%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.50
-[2023-04-14 12:15:57,800] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-[2023-04-14 12:15:57,801] [INFO] [logging.py:96:log_dist] [Rank 0] step=5670, skipped=71, lr=[2.3182144686488735e-06, 2.3182144686488735e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:15:57,801] [INFO] [timer.py:199:stop] epoch=0/micro_step=5670/global_step=5670, RunningAvgSamplesPerSec=105.34351177301964, CurrSamplesPerSec=121.80373494216448, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:15:57,893] [INFO] [logging.py:96:log_dist] [Rank 0] step=5670, skipped=92, lr=[1.218460094889067e-06, 1.218460094889067e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5669|ppo_ep: 1|act_loss: 0.00289154052734375|cri_loss: 0.0015668869018554688|unsuper_loss: 0.0
-average reward score: 5.41796875
--------------------------------------------------------------------------------------
-|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.29%) |Training time=0.42s (20.05%) |Others=0.10 (4.67%)|CurSamplesPerSec=15.10 |AvgSamplesPerSec=14.50
-[2023-04-14 12:15:59,951] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 5670|ppo_ep: 1|act_loss: 0.0088653564453125|cri_loss: 0.00457000732421875|unsuper_loss: 0.0
-average reward score: 5.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.04%) |Training time=0.44s (20.34%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5671|ppo_ep: 1|act_loss: -0.0054931640625|cri_loss: -0.0025272369384765625|unsuper_loss: 0.0
-average reward score: 5.4921875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.36%) |Training time=0.45s (20.71%) |Others=0.11 (4.93%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5672|ppo_ep: 1|act_loss: 0.0093536376953125|cri_loss: 0.004886627197265625|unsuper_loss: 0.0
-average reward score: 5.4765625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.23%) |Training time=0.45s (21.10%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5673|ppo_ep: 1|act_loss: 0.04095458984375|cri_loss: 0.021514892578125|unsuper_loss: 0.0
-average reward score: 4.921875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.99%) |Training time=0.46s (21.37%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5674|ppo_ep: 1|act_loss: 0.0182952880859375|cri_loss: 0.0092620849609375|unsuper_loss: 0.0
-average reward score: 4.65625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.45s (21.14%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5675|ppo_ep: 1|act_loss: 0.003265380859375|cri_loss: 0.002109527587890625|unsuper_loss: 0.0
-average reward score: 5.7421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.59%) |Training time=0.45s (20.73%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5676|ppo_ep: 1|act_loss: 0.009246826171875|cri_loss: 0.005527496337890625|unsuper_loss: 0.0
-average reward score: 5.3828125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.31%) |Training time=0.45s (21.02%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5677|ppo_ep: 1|act_loss: -0.01214599609375|cri_loss: -0.005889892578125|unsuper_loss: 0.0
-average reward score: 5.75
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.90%) |Training time=0.46s (19.81%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.68 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5678|ppo_ep: 1|act_loss: -0.0177154541015625|cri_loss: -0.00844573974609375|unsuper_loss: 0.0
-average reward score: 5.3125
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.15%) |Training time=0.47s (20.44%) |Others=0.10 (4.41%)|CurSamplesPerSec=14.06 |AvgSamplesPerSec=14.50
-[2023-04-14 12:16:19,632] [INFO] [logging.py:96:log_dist] [Rank 0] step=5680, skipped=72, lr=[2.3039444252455474e-06, 2.3039444252455474e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:16:19,650] [INFO] [timer.py:199:stop] epoch=0/micro_step=5680/global_step=5680, RunningAvgSamplesPerSec=105.3509397411824, CurrSamplesPerSec=106.0162415650417, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:16:19,734] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-[2023-04-14 12:16:19,734] [INFO] [logging.py:96:log_dist] [Rank 0] step=5680, skipped=93, lr=[1.2110300418965807e-06, 1.2110300418965807e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5679|ppo_ep: 1|act_loss: 0.0119781494140625|cri_loss: 0.00623321533203125|unsuper_loss: 0.0
-average reward score: 5.38671875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.19%) |Training time=0.46s (21.67%) |Others=0.09 (4.14%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.50
-[2023-04-14 12:16:21,885] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 5680|ppo_ep: 1|act_loss: 0.0156097412109375|cri_loss: 0.0085296630859375|unsuper_loss: 0.0
-average reward score: 4.7421875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.47s (21.98%) |Others=0.09 (4.16%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5681|ppo_ep: 1|act_loss: -0.026214599609375|cri_loss: -0.0122528076171875|unsuper_loss: 0.0
-average reward score: 5.03125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.92%) |Training time=0.46s (21.42%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5682|ppo_ep: 1|act_loss: 0.037353515625|cri_loss: 0.019256591796875|unsuper_loss: 0.0
-average reward score: 4.78515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.77%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5683|ppo_ep: 1|act_loss: 0.008056640625|cri_loss: 0.0044708251953125|unsuper_loss: 0.0
-average reward score: 5.86328125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.61%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5684|ppo_ep: 1|act_loss: -0.031463623046875|cri_loss: -0.013458251953125|unsuper_loss: 0.0
-average reward score: 5.88671875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.76%) |Training time=0.46s (21.60%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50
-epoch: 0|step: 5685|ppo_ep: 1|act_loss: 0.0073089599609375|cri_loss: 0.004638671875|unsuper_loss: 0.0
-average reward score: 5.59375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.86%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5686|ppo_ep: 1|act_loss: 0.00563812255859375|cri_loss: 0.0030384063720703125|unsuper_loss: 0.0
-average reward score: 5.37890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.73%) |Training time=0.47s (21.61%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5687|ppo_ep: 1|act_loss: -0.034942626953125|cri_loss: -0.0169219970703125|unsuper_loss: 0.0
-average reward score: 5.71875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.84%) |Training time=0.46s (21.51%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5688|ppo_ep: 1|act_loss: -0.0142364501953125|cri_loss: -0.0067901611328125|unsuper_loss: 0.0
-average reward score: 5.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.67%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.51
-[2023-04-14 12:16:41,180] [INFO] [logging.py:96:log_dist] [Rank 0] step=5690, skipped=72, lr=[2.2881243286214536e-06, 2.2881243286214536e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:16:41,198] [INFO] [timer.py:199:stop] epoch=0/micro_step=5690/global_step=5690, RunningAvgSamplesPerSec=105.35023067784104, CurrSamplesPerSec=105.09620121933077, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:16:41,291] [INFO] [logging.py:96:log_dist] [Rank 0] step=5690, skipped=94, lr=[1.2036154644795697e-06, 1.2036154644795697e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5689|ppo_ep: 1|act_loss: 0.03704833984375|cri_loss: 0.019378662109375|unsuper_loss: 0.0
-average reward score: 5.3671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.67%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5690|ppo_ep: 1|act_loss: 0.0243682861328125|cri_loss: 0.0134124755859375|unsuper_loss: 0.0
-average reward score: 5.9921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.08%) |Training time=0.46s (21.25%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5691|ppo_ep: 1|act_loss: 0.035430908203125|cri_loss: 0.0182647705078125|unsuper_loss: 0.0
-average reward score: 4.703125
--------------------------------------------------------------------------------------
-|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.60s (71.64%) |Training time=0.46s (20.66%) |Others=0.17 (7.69%)|CurSamplesPerSec=14.34 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5692|ppo_ep: 1|act_loss: 0.0447998046875|cri_loss: 0.0230255126953125|unsuper_loss: 0.0
-average reward score: 5.12109375
--------------------------------------------------------------------------------------
-|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.67s (74.77%) |Training time=0.46s (20.75%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.32 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5693|ppo_ep: 1|act_loss: 0.035491943359375|cri_loss: 0.0187835693359375|unsuper_loss: 0.0
-average reward score: 5.203125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.11%) |Training time=0.46s (21.24%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5694|ppo_ep: 1|act_loss: 0.0137176513671875|cri_loss: 0.00716400146484375|unsuper_loss: 0.0
-average reward score: 5.3671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.24%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5695|ppo_ep: 1|act_loss: -0.01076507568359375|cri_loss: -0.005306243896484375|unsuper_loss: 0.0
-average reward score: 5.6875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.00%) |Training time=0.46s (21.36%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5696|ppo_ep: 1|act_loss: -0.0136566162109375|cri_loss: -0.005901336669921875|unsuper_loss: 0.0
-average reward score: 5.2734375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.22%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5697|ppo_ep: 1|act_loss: -0.0341796875|cri_loss: -0.016815185546875|unsuper_loss: 0.0
-average reward score: 5.82421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.56%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5698|ppo_ep: 1|act_loss: 0.04217529296875|cri_loss: 0.0222625732421875|unsuper_loss: 0.0
-average reward score: 5.328125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.11%) |Training time=0.46s (21.26%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.51
-[2023-04-14 12:17:02,892] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
-[2023-04-14 12:17:02,893] [INFO] [logging.py:96:log_dist] [Rank 0] step=5700, skipped=73, lr=[2.273918385226065e-06, 2.273918385226065e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:17:02,893] [INFO] [timer.py:199:stop] epoch=0/micro_step=5700/global_step=5700, RunningAvgSamplesPerSec=105.35557904627348, CurrSamplesPerSec=118.39787723532484, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:17:02,985] [INFO] [logging.py:96:log_dist] [Rank 0] step=5700, skipped=94, lr=[1.1953953037610977e-06, 1.1953953037610977e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5699|ppo_ep: 1|act_loss: 0.0312042236328125|cri_loss: 0.0164794921875|unsuper_loss: 0.0
-average reward score: 5.484375
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.00%) |Training time=0.43s (20.32%) |Others=0.10 (4.68%)|CurSamplesPerSec=15.05 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5700|ppo_ep: 1|act_loss: 0.010406494140625|cri_loss: 0.005855560302734375|unsuper_loss: 0.0
-average reward score: 5.0703125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.34%) |Training time=0.45s (21.01%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5701|ppo_ep: 1|act_loss: -0.01861572265625|cri_loss: -0.009185791015625|unsuper_loss: 0.0
-average reward score: 5.77734375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.50%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5702|ppo_ep: 1|act_loss: 0.02288818359375|cri_loss: 0.011749267578125|unsuper_loss: 0.0
-average reward score: 5.31640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.02%) |Training time=0.46s (21.35%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5703|ppo_ep: 1|act_loss: 0.0144500732421875|cri_loss: 0.00774383544921875|unsuper_loss: 0.0
-average reward score: 5.7265625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.17%) |Training time=0.46s (21.17%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5704|ppo_ep: 1|act_loss: 0.020233154296875|cri_loss: 0.01152801513671875|unsuper_loss: 0.0
-average reward score: 4.99609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.25%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5705|ppo_ep: 1|act_loss: -0.01038360595703125|cri_loss: -0.00502777099609375|unsuper_loss: 0.0
-average reward score: 5.2421875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.02%) |Training time=0.44s (20.31%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5706|ppo_ep: 1|act_loss: 0.0258941650390625|cri_loss: 0.01401519775390625|unsuper_loss: 0.0
-average reward score: 5.7265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.03%) |Training time=0.44s (20.32%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5707|ppo_ep: 1|act_loss: -0.054718017578125|cri_loss: -0.023895263671875|unsuper_loss: 0.0
-average reward score: 5.21484375
--------------------------------------------------------------------------------------
-|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.80s (76.29%) |Training time=0.46s (19.46%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.60 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5708|ppo_ep: 1|act_loss: 0.0155029296875|cri_loss: 0.0079498291015625|unsuper_loss: 0.0
-average reward score: 5.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.96%) |Training time=0.44s (19.61%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.20 |AvgSamplesPerSec=14.51
-[2023-04-14 12:17:24,721] [INFO] [logging.py:96:log_dist] [Rank 0] step=5710, skipped=73, lr=[2.2581699336049657e-06, 2.2581699336049657e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:17:24,740] [INFO] [timer.py:199:stop] epoch=0/micro_step=5710/global_step=5710, RunningAvgSamplesPerSec=105.36524562432051, CurrSamplesPerSec=114.486349178187, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:17:24,832] [INFO] [logging.py:96:log_dist] [Rank 0] step=5710, skipped=94, lr=[1.1871944804314127e-06, 1.1871944804314127e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5709|ppo_ep: 1|act_loss: 0.01461029052734375|cri_loss: 0.0086212158203125|unsuper_loss: 0.0
-average reward score: 5.1796875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.84%) |Training time=0.44s (20.54%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5710|ppo_ep: 1|act_loss: -0.0137786865234375|cri_loss: -0.006793975830078125|unsuper_loss: 0.0
-average reward score: 5.49609375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.76%) |Training time=0.44s (20.57%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5711|ppo_ep: 1|act_loss: 0.0221099853515625|cri_loss: 0.01142120361328125|unsuper_loss: 0.0
-average reward score: 4.609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.69%) |Training time=0.45s (20.65%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5712|ppo_ep: 1|act_loss: 0.03350830078125|cri_loss: 0.0171661376953125|unsuper_loss: 0.0
-average reward score: 5.203125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.39%) |Training time=0.43s (19.97%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5713|ppo_ep: 1|act_loss: -0.00296783447265625|cri_loss: -0.0011854171752929688|unsuper_loss: 0.0
-average reward score: 5.49609375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.69%) |Training time=0.44s (20.65%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5714|ppo_ep: 1|act_loss: 0.0247650146484375|cri_loss: 0.01328277587890625|unsuper_loss: 0.0
-average reward score: 6.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.41%) |Training time=0.45s (20.95%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5715|ppo_ep: 1|act_loss: 0.015167236328125|cri_loss: 0.00841522216796875|unsuper_loss: 0.0
-average reward score: 5.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.73%) |Training time=0.44s (20.64%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5716|ppo_ep: 1|act_loss: 0.0017757415771484375|cri_loss: 0.001026153564453125|unsuper_loss: 0.0
-average reward score: 4.88671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.55%) |Training time=0.45s (20.81%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5717|ppo_ep: 1|act_loss: -0.029449462890625|cri_loss: -0.01311492919921875|unsuper_loss: 0.0
-average reward score: 5.26953125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.44%) |Training time=0.45s (20.79%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5718|ppo_ep: 1|act_loss: 0.0399169921875|cri_loss: 0.022247314453125|unsuper_loss: 0.0
-average reward score: 5.3984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.64%) |Training time=0.45s (20.72%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.51
-[2023-04-14 12:17:46,248] [INFO] [logging.py:96:log_dist] [Rank 0] step=5720, skipped=73, lr=[2.242459528597593e-06, 2.242459528597593e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:17:46,267] [INFO] [timer.py:199:stop] epoch=0/micro_step=5720/global_step=5720, RunningAvgSamplesPerSec=105.37837842461052, CurrSamplesPerSec=112.51268157248816, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:17:46,359] [INFO] [logging.py:96:log_dist] [Rank 0] step=5720, skipped=94, lr=[1.1790131160464974e-06, 1.1790131160464974e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5719|ppo_ep: 1|act_loss: 0.00589752197265625|cri_loss: 0.0037384033203125|unsuper_loss: 0.0
-average reward score: 4.78125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.59%) |Training time=0.45s (20.78%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5720|ppo_ep: 1|act_loss: 0.00400543212890625|cri_loss: 0.002696990966796875|unsuper_loss: 0.0
-average reward score: 5.6640625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.57%) |Training time=0.45s (20.78%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5721|ppo_ep: 1|act_loss: 0.0252227783203125|cri_loss: 0.0133514404296875|unsuper_loss: 0.0
-average reward score: 4.71875
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.61s (71.63%) |Training time=0.49s (21.60%) |Others=0.15 (6.77%)|CurSamplesPerSec=14.23 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5722|ppo_ep: 1|act_loss: 0.032135009765625|cri_loss: 0.0175628662109375|unsuper_loss: 0.0
-average reward score: 5.40625
--------------------------------------------------------------------------------------
-|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.63s (72.89%) |Training time=0.49s (21.99%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.28 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5723|ppo_ep: 1|act_loss: -0.041290283203125|cri_loss: -0.020355224609375|unsuper_loss: 0.0
-average reward score: 5.2734375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.11%) |Training time=0.49s (22.33%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5724|ppo_ep: 1|act_loss: -0.04010009765625|cri_loss: -0.01953125|unsuper_loss: 0.0
-average reward score: 5.6015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.35%) |Training time=0.48s (22.04%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5725|ppo_ep: 1|act_loss: 0.017364501953125|cri_loss: 0.0089874267578125|unsuper_loss: 0.0
-average reward score: 5.3125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.02%) |Training time=0.47s (21.32%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5726|ppo_ep: 1|act_loss: -0.039947509765625|cri_loss: -0.0191802978515625|unsuper_loss: 0.0
-average reward score: 5.37109375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.38%) |Training time=0.41s (18.92%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5727|ppo_ep: 1|act_loss: 0.016815185546875|cri_loss: 0.008697509765625|unsuper_loss: 0.0
-average reward score: 4.88671875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.92%) |Training time=0.47s (21.41%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5728|ppo_ep: 1|act_loss: 0.04681396484375|cri_loss: 0.0238494873046875|unsuper_loss: 0.0
-average reward score: 4.28515625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.19%) |Training time=0.46s (21.19%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.51
-[2023-04-14 12:18:08,163] [INFO] [logging.py:96:log_dist] [Rank 0] step=5730, skipped=73, lr=[2.226787403070044e-06, 2.226787403070044e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:18:08,181] [INFO] [timer.py:199:stop] epoch=0/micro_step=5730/global_step=5730, RunningAvgSamplesPerSec=105.37804869000095, CurrSamplesPerSec=104.9343332311753, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:18:08,274] [INFO] [logging.py:96:log_dist] [Rank 0] step=5730, skipped=94, lr=[1.1708513318739096e-06, 1.1708513318739096e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5729|ppo_ep: 1|act_loss: 0.0394287109375|cri_loss: 0.020660400390625|unsuper_loss: 0.0
-average reward score: 5.0625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.88%) |Training time=0.47s (21.50%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5730|ppo_ep: 1|act_loss: 0.024932861328125|cri_loss: 0.0137481689453125|unsuper_loss: 0.0
-average reward score: 5.29296875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.24%) |Training time=0.46s (21.13%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5731|ppo_ep: 1|act_loss: 0.0161590576171875|cri_loss: 0.009124755859375|unsuper_loss: 0.0
-average reward score: 5.16796875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.85%) |Training time=0.45s (20.51%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5732|ppo_ep: 1|act_loss: 0.04718017578125|cri_loss: 0.02490234375|unsuper_loss: 0.0
-average reward score: 4.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.42%) |Training time=0.46s (20.95%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5733|ppo_ep: 1|act_loss: 0.00678253173828125|cri_loss: 0.0037174224853515625|unsuper_loss: 0.0
-average reward score: 4.76171875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.53%) |Training time=0.48s (21.84%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5734|ppo_ep: 1|act_loss: -0.0018100738525390625|cri_loss: -0.0007863044738769531|unsuper_loss: 0.0
-average reward score: 4.953125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.69%) |Training time=0.47s (21.61%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5735|ppo_ep: 1|act_loss: -0.0214691162109375|cri_loss: -0.0105133056640625|unsuper_loss: 0.0
-average reward score: 4.328125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.48%) |Training time=0.47s (21.51%) |Others=0.11 (5.01%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5736|ppo_ep: 1|act_loss: -0.0106353759765625|cri_loss: -0.00485992431640625|unsuper_loss: 0.0
-average reward score: 5.16015625
--------------------------------------------------------------------------------------
-|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.73s (73.44%) |Training time=0.52s (21.94%) |Others=0.11 (4.61%)|CurSamplesPerSec=13.59 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5737|ppo_ep: 1|act_loss: -0.01264190673828125|cri_loss: -0.00566864013671875|unsuper_loss: 0.0
-average reward score: 5.34375
--------------------------------------------------------------------------------------
-|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.64s (73.30%) |Training time=0.48s (21.40%) |Others=0.12 (5.29%)|CurSamplesPerSec=14.27 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5738|ppo_ep: 1|act_loss: -0.037322998046875|cri_loss: -0.017547607421875|unsuper_loss: 0.0
-average reward score: 5.81640625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.42%) |Training time=0.48s (21.97%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.51
-[2023-04-14 12:18:30,185] [INFO] [logging.py:96:log_dist] [Rank 0] step=5740, skipped=73, lr=[2.2111537893210277e-06, 2.2111537893210277e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:18:30,203] [INFO] [timer.py:199:stop] epoch=0/micro_step=5740/global_step=5740, RunningAvgSamplesPerSec=105.37475219460445, CurrSamplesPerSec=103.98614734176941, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:18:30,295] [INFO] [logging.py:96:log_dist] [Rank 0] step=5740, skipped=94, lr=[1.1627092488909802e-06, 1.1627092488909802e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5739|ppo_ep: 1|act_loss: -0.016815185546875|cri_loss: -0.0080108642578125|unsuper_loss: 0.0
-average reward score: 5.65625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.65%) |Training time=0.47s (21.74%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5740|ppo_ep: 1|act_loss: 0.002567291259765625|cri_loss: 0.0016937255859375|unsuper_loss: 0.0
-average reward score: 5.0703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.89%) |Training time=0.47s (21.48%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5741|ppo_ep: 1|act_loss: -0.0176544189453125|cri_loss: -0.00852203369140625|unsuper_loss: 0.0
-average reward score: 4.921875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.56%) |Training time=0.48s (21.83%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5742|ppo_ep: 1|act_loss: -0.019744873046875|cri_loss: -0.0095367431640625|unsuper_loss: 0.0
-average reward score: 5.0625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.05%) |Training time=0.46s (21.30%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5743|ppo_ep: 1|act_loss: -0.022613525390625|cri_loss: -0.01056671142578125|unsuper_loss: 0.0
-average reward score: 4.96484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.02%) |Training time=0.49s (22.35%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5744|ppo_ep: 1|act_loss: -0.00809478759765625|cri_loss: -0.003993988037109375|unsuper_loss: 0.0
-average reward score: 6.26953125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.23%) |Training time=0.46s (21.17%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5745|ppo_ep: 1|act_loss: -0.0010538101196289062|cri_loss: 0.00023937225341796875|unsuper_loss: 0.0
-average reward score: 4.92578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.23%) |Training time=0.48s (22.17%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5746|ppo_ep: 1|act_loss: 0.052093505859375|cri_loss: 0.0269927978515625|unsuper_loss: 0.0
-average reward score: 5.56640625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.48s (22.28%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5747|ppo_ep: 1|act_loss: 0.027069091796875|cri_loss: 0.01427459716796875|unsuper_loss: 0.0
-average reward score: 5.3046875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.22%) |Training time=0.48s (22.12%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5748|ppo_ep: 1|act_loss: 0.0090789794921875|cri_loss: 0.0047760009765625|unsuper_loss: 0.0
-average reward score: 4.8984375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.98%) |Training time=0.49s (22.41%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.51
-[2023-04-14 12:18:51,928] [INFO] [logging.py:96:log_dist] [Rank 0] step=5750, skipped=73, lr=[2.195558919078415e-06, 2.195558919078415e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:18:51,947] [INFO] [timer.py:199:stop] epoch=0/micro_step=5750/global_step=5750, RunningAvgSamplesPerSec=105.36822216331952, CurrSamplesPerSec=100.24642125720193, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:18:52,039] [INFO] [logging.py:96:log_dist] [Rank 0] step=5750, skipped=94, lr=[1.1545869877830212e-06, 1.1545869877830212e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5749|ppo_ep: 1|act_loss: -0.055419921875|cri_loss: -0.0255584716796875|unsuper_loss: 0.0
-average reward score: 5.57421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.21%) |Training time=0.48s (22.20%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5750|ppo_ep: 1|act_loss: -0.03875732421875|cri_loss: -0.01885986328125|unsuper_loss: 0.0
-average reward score: 4.62109375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.93%) |Training time=0.49s (22.47%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5751|ppo_ep: 1|act_loss: -0.0103912353515625|cri_loss: -0.00496673583984375|unsuper_loss: 0.0
-average reward score: 5.18359375
--------------------------------------------------------------------------------------
-|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.31%) |Training time=0.48s (20.42%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.59 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5752|ppo_ep: 1|act_loss: 0.0010557174682617188|cri_loss: 0.0008831024169921875|unsuper_loss: 0.0
-average reward score: 5.09765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.32%) |Training time=0.48s (22.06%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5753|ppo_ep: 1|act_loss: -0.0428466796875|cri_loss: -0.0207977294921875|unsuper_loss: 0.0
-average reward score: 5.03515625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.48s (22.29%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5754|ppo_ep: 1|act_loss: 0.028411865234375|cri_loss: 0.0145263671875|unsuper_loss: 0.0
-average reward score: 5.21484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.36%) |Training time=0.45s (21.00%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5755|ppo_ep: 1|act_loss: 0.01409149169921875|cri_loss: 0.007289886474609375|unsuper_loss: 0.0
-average reward score: 5.76171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.47%) |Training time=0.45s (20.89%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5756|ppo_ep: 1|act_loss: 0.01885986328125|cri_loss: 0.00994110107421875|unsuper_loss: 0.0
-average reward score: 4.9296875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.62%) |Training time=0.45s (20.76%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5757|ppo_ep: 1|act_loss: 0.014007568359375|cri_loss: 0.0072174072265625|unsuper_loss: 0.0
-average reward score: 5.28515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.83%) |Training time=0.44s (20.51%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5758|ppo_ep: 1|act_loss: 0.00698089599609375|cri_loss: 0.003719329833984375|unsuper_loss: 0.0
-average reward score: 5.10546875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.45%) |Training time=0.46s (20.94%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.51
-[2023-04-14 12:19:13,815] [INFO] [logging.py:96:log_dist] [Rank 0] step=5760, skipped=73, lr=[2.180003023495804e-06, 2.180003023495804e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:19:13,834] [INFO] [timer.py:199:stop] epoch=0/micro_step=5760/global_step=5760, RunningAvgSamplesPerSec=105.36992402704047, CurrSamplesPerSec=113.88001075862748, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:19:13,926] [INFO] [logging.py:96:log_dist] [Rank 0] step=5760, skipped=94, lr=[1.1464846689415345e-06, 1.1464846689415345e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5759|ppo_ep: 1|act_loss: 0.004638671875|cri_loss: 0.0025043487548828125|unsuper_loss: 0.0
-average reward score: 5.1015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.87%) |Training time=0.44s (20.50%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5760|ppo_ep: 1|act_loss: -0.00904083251953125|cri_loss: -0.004444122314453125|unsuper_loss: 0.0
-average reward score: 5.09375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.69%) |Training time=0.45s (20.68%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5761|ppo_ep: 1|act_loss: -0.0150146484375|cri_loss: -0.0073089599609375|unsuper_loss: 0.0
-average reward score: 5.109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.66%) |Training time=0.45s (20.71%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5762|ppo_ep: 1|act_loss: -0.001277923583984375|cri_loss: -2.956390380859375e-05|unsuper_loss: 0.0
-average reward score: 5.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.22%) |Training time=0.46s (21.17%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5763|ppo_ep: 1|act_loss: -0.00362396240234375|cri_loss: -0.001667022705078125|unsuper_loss: 0.0
-average reward score: 5.1875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.90%) |Training time=0.44s (20.44%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5764|ppo_ep: 1|act_loss: -0.0035037994384765625|cri_loss: -0.0008087158203125|unsuper_loss: 0.0
-average reward score: 5.171875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.45%) |Training time=0.45s (20.91%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5765|ppo_ep: 1|act_loss: 0.0462646484375|cri_loss: 0.0257720947265625|unsuper_loss: 0.0
-average reward score: 4.9765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.23%) |Training time=0.46s (21.15%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5766|ppo_ep: 1|act_loss: 0.02264404296875|cri_loss: 0.011871337890625|unsuper_loss: 0.0
-average reward score: 5.42578125
--------------------------------------------------------------------------------------
-|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.77s (74.90%) |Training time=0.49s (20.81%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.55 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5767|ppo_ep: 1|act_loss: 0.0171661376953125|cri_loss: 0.00885772705078125|unsuper_loss: 0.0
-average reward score: 5.6171875
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.51%) |Training time=0.45s (19.95%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.06 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5768|ppo_ep: 1|act_loss: 0.019622802734375|cri_loss: 0.01026153564453125|unsuper_loss: 0.0
-average reward score: 5.71484375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.16%) |Training time=0.46s (21.23%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.51
-[2023-04-14 12:19:35,783] [INFO] [logging.py:96:log_dist] [Rank 0] step=5770, skipped=73, lr=[2.164486333149091e-06, 2.164486333149091e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:19:35,801] [INFO] [timer.py:199:stop] epoch=0/micro_step=5770/global_step=5770, RunningAvgSamplesPerSec=105.3766256111742, CurrSamplesPerSec=108.46236360154154, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:19:35,894] [INFO] [logging.py:96:log_dist] [Rank 0] step=5770, skipped=94, lr=[1.1384024124624324e-06, 1.1384024124624324e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5769|ppo_ep: 1|act_loss: -0.0010747909545898438|cri_loss: -0.0003705024719238281|unsuper_loss: 0.0
-average reward score: 5.41796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.33%) |Training time=0.46s (21.08%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5770|ppo_ep: 1|act_loss: 0.00479888916015625|cri_loss: 0.0035762786865234375|unsuper_loss: 0.0
-average reward score: 5.5390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.19%) |Training time=0.46s (21.21%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5771|ppo_ep: 1|act_loss: -0.00518798828125|cri_loss: -0.002086639404296875|unsuper_loss: 0.0
-average reward score: 5.69140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.56%) |Training time=0.45s (20.81%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5772|ppo_ep: 1|act_loss: 0.003818511962890625|cri_loss: 0.0019702911376953125|unsuper_loss: 0.0
-average reward score: 4.8828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.15%) |Training time=0.46s (21.24%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5773|ppo_ep: 1|act_loss: 0.005664825439453125|cri_loss: 0.00347137451171875|unsuper_loss: 0.0
-average reward score: 5.15234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.38%) |Training time=0.46s (21.01%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5774|ppo_ep: 1|act_loss: 0.019561767578125|cri_loss: 0.01019287109375|unsuper_loss: 0.0
-average reward score: 5.02734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.57%) |Training time=0.45s (20.81%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5775|ppo_ep: 1|act_loss: -0.0223236083984375|cri_loss: -0.01052093505859375|unsuper_loss: 0.0
-average reward score: 5.05859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.36%) |Training time=0.46s (21.02%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5776|ppo_ep: 1|act_loss: 0.0012111663818359375|cri_loss: 0.0007171630859375|unsuper_loss: 0.0
-average reward score: 5.18359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.46%) |Training time=0.45s (20.93%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5777|ppo_ep: 1|act_loss: -0.01100921630859375|cri_loss: -0.00514984130859375|unsuper_loss: 0.0
-average reward score: 5.515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.18%) |Training time=0.46s (21.23%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5778|ppo_ep: 1|act_loss: -0.02056884765625|cri_loss: -0.00988006591796875|unsuper_loss: 0.0
-average reward score: 4.4765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.46%) |Training time=0.45s (20.92%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51
-[2023-04-14 12:19:57,471] [INFO] [logging.py:96:log_dist] [Rank 0] step=5780, skipped=73, lr=[2.1490090780330607e-06, 2.1490090780330607e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:19:57,489] [INFO] [timer.py:199:stop] epoch=0/micro_step=5780/global_step=5780, RunningAvgSamplesPerSec=105.38259097764607, CurrSamplesPerSec=107.7067809395413, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:19:57,582] [INFO] [logging.py:96:log_dist] [Rank 0] step=5780, skipped=94, lr=[1.1303403381442527e-06, 1.1303403381442527e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5779|ppo_ep: 1|act_loss: -0.00388336181640625|cri_loss: -0.0016489028930664062|unsuper_loss: 0.0
-average reward score: 5.30859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.27%) |Training time=0.46s (21.14%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5780|ppo_ep: 1|act_loss: -0.0297088623046875|cri_loss: -0.01397705078125|unsuper_loss: 0.0
-average reward score: 5.09375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.71%) |Training time=0.45s (20.70%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.51
-[2023-04-14 12:20:01,926] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 5781|ppo_ep: 1|act_loss: -0.035430908203125|cri_loss: -0.0171051025390625|unsuper_loss: 0.0
-average reward score: 4.765625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.08%) |Training time=0.45s (20.76%) |Others=0.09 (4.16%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.51
-[2023-04-14 12:20:04,093] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 5782|ppo_ep: 1|act_loss: 0.004146575927734375|cri_loss: 0.0023784637451171875|unsuper_loss: 0.0
-average reward score: 4.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.77%) |Training time=0.46s (21.11%) |Others=0.09 (4.12%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51
-[2023-04-14 12:20:06,309] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192
-epoch: 0|step: 5783|ppo_ep: 1|act_loss: -0.0152130126953125|cri_loss: -0.007354736328125|unsuper_loss: 0.0
-average reward score: 5.9140625
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.79s (77.39%) |Training time=0.42s (18.28%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.85 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5784|ppo_ep: 1|act_loss: -0.04534912109375|cri_loss: -0.022186279296875|unsuper_loss: 0.0
-average reward score: 4.9296875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.48%) |Training time=0.46s (20.93%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5785|ppo_ep: 1|act_loss: -0.004711151123046875|cri_loss: -0.0017976760864257812|unsuper_loss: 0.0
-average reward score: 4.58984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.73%) |Training time=0.45s (20.65%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5786|ppo_ep: 1|act_loss: -0.027191162109375|cri_loss: -0.0132293701171875|unsuper_loss: 0.0
-average reward score: 4.75
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.45%) |Training time=0.45s (20.94%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5787|ppo_ep: 1|act_loss: -0.0107574462890625|cri_loss: -0.005168914794921875|unsuper_loss: 0.0
-average reward score: 4.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.18%) |Training time=0.46s (21.21%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5788|ppo_ep: 1|act_loss: 0.024444580078125|cri_loss: 0.0126953125|unsuper_loss: 0.0
-average reward score: 4.84765625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.28%) |Training time=0.46s (21.14%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.51
-[2023-04-14 12:20:19,343] [INFO] [logging.py:96:log_dist] [Rank 0] step=5790, skipped=74, lr=[2.1351134551705775e-06, 2.1351134551705775e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:20:19,361] [INFO] [timer.py:199:stop] epoch=0/micro_step=5790/global_step=5790, RunningAvgSamplesPerSec=105.3911187251698, CurrSamplesPerSec=108.43922913285121, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:20:19,453] [INFO] [logging.py:96:log_dist] [Rank 0] step=5790, skipped=96, lr=[1.12390529015926e-06, 1.12390529015926e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5789|ppo_ep: 1|act_loss: 0.0033512115478515625|cri_loss: 0.001781463623046875|unsuper_loss: 0.0
-average reward score: 4.64453125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.37%) |Training time=0.46s (21.05%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5790|ppo_ep: 1|act_loss: -0.012176513671875|cri_loss: -0.0050506591796875|unsuper_loss: 0.0
-average reward score: 4.91796875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.43%) |Training time=0.46s (21.00%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5791|ppo_ep: 1|act_loss: -0.0051422119140625|cri_loss: -0.002140045166015625|unsuper_loss: 0.0
-average reward score: 5.5703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.45%) |Training time=0.46s (20.93%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5792|ppo_ep: 1|act_loss: 0.0010499954223632812|cri_loss: 0.0006031990051269531|unsuper_loss: 0.0
-average reward score: 5.1484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.38%) |Training time=0.46s (21.03%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5793|ppo_ep: 1|act_loss: -0.00574493408203125|cri_loss: -0.00251007080078125|unsuper_loss: 0.0
-average reward score: 5.171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.44%) |Training time=0.45s (20.96%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5794|ppo_ep: 1|act_loss: -0.0072784423828125|cri_loss: -0.0033245086669921875|unsuper_loss: 0.0
-average reward score: 5.765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.44%) |Training time=0.46s (20.96%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5795|ppo_ep: 1|act_loss: -0.0041961669921875|cri_loss: -0.001972198486328125|unsuper_loss: 0.0
-average reward score: 5.5234375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.61%) |Training time=0.45s (20.77%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5796|ppo_ep: 1|act_loss: -0.0179443359375|cri_loss: -0.00862884521484375|unsuper_loss: 0.0
-average reward score: 5.3828125
--------------------------------------------------------------------------------------
-|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.65s (73.39%) |Training time=0.49s (21.64%) |Others=0.11 (4.97%)|CurSamplesPerSec=14.28 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5797|ppo_ep: 1|act_loss: 0.07708740234375|cri_loss: 0.03997802734375|unsuper_loss: 0.0
-average reward score: 4.8671875
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.30%) |Training time=0.44s (19.64%) |Others=0.16 (7.06%)|CurSamplesPerSec=14.45 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5798|ppo_ep: 1|act_loss: 0.0013828277587890625|cri_loss: 0.0012359619140625|unsuper_loss: 0.0
-average reward score: 5.9609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.18%) |Training time=0.45s (20.87%) |Others=0.11 (4.95%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.51
-[2023-04-14 12:20:41,175] [INFO] [logging.py:96:log_dist] [Rank 0] step=5800, skipped=74, lr=[2.1197117585322546e-06, 2.1197117585322546e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:20:41,193] [INFO] [timer.py:199:stop] epoch=0/micro_step=5800/global_step=5800, RunningAvgSamplesPerSec=105.39739484592792, CurrSamplesPerSec=109.96594783124505, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:20:41,286] [INFO] [logging.py:96:log_dist] [Rank 0] step=5800, skipped=96, lr=[1.1158798446670462e-06, 1.1158798446670462e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5799|ppo_ep: 1|act_loss: -0.0104522705078125|cri_loss: -0.004947662353515625|unsuper_loss: 0.0
-average reward score: 5.1328125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.45s (21.12%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5800|ppo_ep: 1|act_loss: -0.02130126953125|cri_loss: -0.010406494140625|unsuper_loss: 0.0
-average reward score: 4.93359375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.25%) |Training time=0.45s (21.09%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5801|ppo_ep: 1|act_loss: 0.005802154541015625|cri_loss: 0.0036106109619140625|unsuper_loss: 0.0
-average reward score: 4.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.24%) |Training time=0.45s (21.12%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5802|ppo_ep: 1|act_loss: -0.0009617805480957031|cri_loss: 2.384185791015625e-06|unsuper_loss: 0.0
-average reward score: 5.265625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.74%) |Training time=0.46s (21.61%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5803|ppo_ep: 1|act_loss: -0.0011854171752929688|cri_loss: -0.0003643035888671875|unsuper_loss: 0.0
-average reward score: 5.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.84%) |Training time=0.46s (21.51%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5804|ppo_ep: 1|act_loss: 0.0267486572265625|cri_loss: 0.01435089111328125|unsuper_loss: 0.0
-average reward score: 4.953125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.64%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5805|ppo_ep: 1|act_loss: 0.0136566162109375|cri_loss: 0.00754547119140625|unsuper_loss: 0.0
-average reward score: 5.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.74%) |Training time=0.46s (21.62%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5806|ppo_ep: 1|act_loss: 0.0253448486328125|cri_loss: 0.01340484619140625|unsuper_loss: 0.0
-average reward score: 5.3203125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.65%) |Training time=0.47s (21.70%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5807|ppo_ep: 1|act_loss: -0.0128173828125|cri_loss: -0.006130218505859375|unsuper_loss: 0.0
-average reward score: 5.78515625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.90%) |Training time=0.46s (21.45%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5808|ppo_ep: 1|act_loss: -0.0037994384765625|cri_loss: -0.0012502670288085938|unsuper_loss: 0.0
-average reward score: 6.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.65%) |Training time=0.47s (21.74%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.51
-[2023-04-14 12:21:02,690] [INFO] [logging.py:96:log_dist] [Rank 0] step=5810, skipped=74, lr=[2.1043501607918214e-06, 2.1043501607918214e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:21:02,709] [INFO] [timer.py:199:stop] epoch=0/micro_step=5810/global_step=5810, RunningAvgSamplesPerSec=105.39882852218719, CurrSamplesPerSec=102.81196321932057, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:21:02,804] [INFO] [logging.py:96:log_dist] [Rank 0] step=5810, skipped=96, lr=[1.107874915174542e-06, 1.107874915174542e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5809|ppo_ep: 1|act_loss: -0.031494140625|cri_loss: -0.0153961181640625|unsuper_loss: 0.0
-average reward score: 5.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.47s (21.95%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5810|ppo_ep: 1|act_loss: -0.03619384765625|cri_loss: -0.0177154541015625|unsuper_loss: 0.0
-average reward score: 5.80078125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.66%) |Training time=0.47s (21.68%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5811|ppo_ep: 1|act_loss: -0.04034423828125|cri_loss: -0.01953125|unsuper_loss: 0.0
-average reward score: 5.77734375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.85%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5812|ppo_ep: 1|act_loss: 0.01007080078125|cri_loss: 0.005229949951171875|unsuper_loss: 0.0
-average reward score: 4.953125
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.59s (70.07%) |Training time=0.47s (20.86%) |Others=0.21 (9.07%)|CurSamplesPerSec=14.09 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5813|ppo_ep: 1|act_loss: -0.0201416015625|cri_loss: -0.0098114013671875|unsuper_loss: 0.0
-average reward score: 5.5390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.47s (21.97%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5814|ppo_ep: 1|act_loss: 0.033935546875|cri_loss: 0.018096923828125|unsuper_loss: 0.0
-average reward score: 5.203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.49%) |Training time=0.47s (21.87%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5815|ppo_ep: 1|act_loss: 0.007904052734375|cri_loss: 0.004215240478515625|unsuper_loss: 0.0
-average reward score: 5.05078125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.91%) |Training time=0.46s (21.40%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5816|ppo_ep: 1|act_loss: 0.012664794921875|cri_loss: 0.006511688232421875|unsuper_loss: 0.0
-average reward score: 4.5234375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.70%) |Training time=0.46s (21.63%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5817|ppo_ep: 1|act_loss: -0.00014269351959228516|cri_loss: 1.1444091796875e-05|unsuper_loss: 0.0
-average reward score: 5.42578125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.82%) |Training time=0.46s (21.51%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5818|ppo_ep: 1|act_loss: 0.040313720703125|cri_loss: 0.0205841064453125|unsuper_loss: 0.0
-average reward score: 5.46484375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.64%) |Training time=0.46s (21.68%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.51
-[2023-04-14 12:21:24,289] [INFO] [logging.py:96:log_dist] [Rank 0] step=5820, skipped=74, lr=[2.0890288896452154e-06, 2.0890288896452154e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:21:24,307] [INFO] [timer.py:199:stop] epoch=0/micro_step=5820/global_step=5820, RunningAvgSamplesPerSec=105.39866420062243, CurrSamplesPerSec=108.02475065836494, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:21:24,400] [INFO] [logging.py:96:log_dist] [Rank 0] step=5820, skipped=96, lr=[1.0998906203341133e-06, 1.0998906203341133e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5819|ppo_ep: 1|act_loss: 0.0126953125|cri_loss: 0.006683349609375|unsuper_loss: 0.0
-average reward score: 4.859375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.89%) |Training time=0.46s (21.45%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5820|ppo_ep: 1|act_loss: 0.002716064453125|cri_loss: 0.0014963150024414062|unsuper_loss: 0.0
-average reward score: 5.1171875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.59%) |Training time=0.47s (21.75%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5821|ppo_ep: 1|act_loss: -0.0369873046875|cri_loss: -0.0176239013671875|unsuper_loss: 0.0
-average reward score: 5.16796875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.72%) |Training time=0.46s (21.58%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5822|ppo_ep: 1|act_loss: 0.00484466552734375|cri_loss: 0.00281524658203125|unsuper_loss: 0.0
-average reward score: 5.9140625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.88%) |Training time=0.46s (21.42%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5823|ppo_ep: 1|act_loss: -0.02099609375|cri_loss: -0.009765625|unsuper_loss: 0.0
-average reward score: 4.99609375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.56%) |Training time=0.47s (21.78%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5824|ppo_ep: 1|act_loss: -0.0311737060546875|cri_loss: -0.0151214599609375|unsuper_loss: 0.0
-average reward score: 5.1484375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.57%) |Training time=0.47s (21.74%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5825|ppo_ep: 1|act_loss: 0.03155517578125|cri_loss: 0.016632080078125|unsuper_loss: 0.0
-average reward score: 4.74609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.67%) |Training time=0.47s (21.67%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5826|ppo_ep: 1|act_loss: -0.00661468505859375|cri_loss: -0.0029697418212890625|unsuper_loss: 0.0
-average reward score: 5.01953125
--------------------------------------------------------------------------------------
-|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.37%) |Training time=0.48s (21.21%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.00 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5827|ppo_ep: 1|act_loss: 0.04132080078125|cri_loss: 0.021881103515625|unsuper_loss: 0.0
-average reward score: 5.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.70%) |Training time=0.47s (21.68%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5828|ppo_ep: 1|act_loss: 0.0458984375|cri_loss: 0.02459716796875|unsuper_loss: 0.0
-average reward score: 5.20703125
--------------------------------------------------------------------------------------
-|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.93%) |Training time=0.49s (20.81%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.62 |AvgSamplesPerSec=14.51
-[2023-04-14 12:21:46,126] [INFO] [logging.py:96:log_dist] [Rank 0] step=5830, skipped=74, lr=[2.0737481721906358e-06, 2.0737481721906358e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:21:46,145] [INFO] [timer.py:199:stop] epoch=0/micro_step=5830/global_step=5830, RunningAvgSamplesPerSec=105.39656981851218, CurrSamplesPerSec=106.60348186749079, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:21:46,237] [INFO] [logging.py:96:log_dist] [Rank 0] step=5830, skipped=96, lr=[1.0919270784922725e-06, 1.0919270784922725e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5829|ppo_ep: 1|act_loss: -0.011474609375|cri_loss: -0.005504608154296875|unsuper_loss: 0.0
-average reward score: 5.28125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.43%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5830|ppo_ep: 1|act_loss: 0.00536346435546875|cri_loss: 0.0030269622802734375|unsuper_loss: 0.0
-average reward score: 5.890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.86%) |Training time=0.47s (21.53%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5831|ppo_ep: 1|act_loss: 0.018096923828125|cri_loss: 0.0093841552734375|unsuper_loss: 0.0
-average reward score: 5.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.46%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5832|ppo_ep: 1|act_loss: 0.015869140625|cri_loss: 0.0083160400390625|unsuper_loss: 0.0
-average reward score: 5.8671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.92%) |Training time=0.46s (21.45%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5833|ppo_ep: 1|act_loss: 0.09326171875|cri_loss: 0.048065185546875|unsuper_loss: 0.0
-average reward score: 5.65234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.47s (21.73%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5834|ppo_ep: 1|act_loss: -0.003528594970703125|cri_loss: -0.0014944076538085938|unsuper_loss: 0.0
-average reward score: 4.24609375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.46s (21.46%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5835|ppo_ep: 1|act_loss: -0.00787353515625|cri_loss: -0.00383758544921875|unsuper_loss: 0.0
-average reward score: 5.515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.94%) |Training time=0.46s (21.44%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5836|ppo_ep: 1|act_loss: -0.001445770263671875|cri_loss: -0.0004544258117675781|unsuper_loss: 0.0
-average reward score: 5.04296875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.88%) |Training time=0.47s (21.55%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5837|ppo_ep: 1|act_loss: -0.0098419189453125|cri_loss: -0.0033473968505859375|unsuper_loss: 0.0
-average reward score: 5.109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.48%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5838|ppo_ep: 1|act_loss: -0.0123443603515625|cri_loss: -0.006011962890625|unsuper_loss: 0.0
-average reward score: 6.16796875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.85%) |Training time=0.46s (21.52%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.51
-[2023-04-14 12:22:07,741] [INFO] [logging.py:96:log_dist] [Rank 0] step=5840, skipped=74, lr=[2.05850823492518e-06, 2.05850823492518e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:22:07,759] [INFO] [timer.py:199:stop] epoch=0/micro_step=5840/global_step=5840, RunningAvgSamplesPerSec=105.39736005809708, CurrSamplesPerSec=107.77095193664996, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:22:07,851] [INFO] [logging.py:96:log_dist] [Rank 0] step=5840, skipped=96, lr=[1.0839844076879186e-06, 1.0839844076879186e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5839|ppo_ep: 1|act_loss: -0.055328369140625|cri_loss: -0.0242919921875|unsuper_loss: 0.0
-average reward score: 5.67578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.32%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5840|ppo_ep: 1|act_loss: -0.0052490234375|cri_loss: -0.0023136138916015625|unsuper_loss: 0.0
-average reward score: 5.1953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.82%) |Training time=0.47s (21.55%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5841|ppo_ep: 1|act_loss: 0.0347900390625|cri_loss: 0.018707275390625|unsuper_loss: 0.0
-average reward score: 5.20703125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.13%) |Training time=0.46s (21.21%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5842|ppo_ep: 1|act_loss: -0.0010709762573242188|cri_loss: -0.0003044605255126953|unsuper_loss: 0.0
-average reward score: 5.765625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.91%) |Training time=0.47s (21.47%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5843|ppo_ep: 1|act_loss: 0.002651214599609375|cri_loss: 0.0016536712646484375|unsuper_loss: 0.0
-average reward score: 4.9296875
--------------------------------------------------------------------------------------
-|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.11%) |Training time=0.49s (20.63%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.56 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5844|ppo_ep: 1|act_loss: 0.00412750244140625|cri_loss: 0.002384185791015625|unsuper_loss: 0.0
-average reward score: 5.02734375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.01%) |Training time=0.44s (20.31%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5845|ppo_ep: 1|act_loss: 0.0032215118408203125|cri_loss: 0.001739501953125|unsuper_loss: 0.0
-average reward score: 5.1484375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.67%) |Training time=0.45s (20.69%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5846|ppo_ep: 1|act_loss: -0.004993438720703125|cri_loss: -0.002056121826171875|unsuper_loss: 0.0
-average reward score: 5.01953125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.83%) |Training time=0.44s (20.35%) |Others=0.10 (4.81%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5847|ppo_ep: 1|act_loss: 0.0643310546875|cri_loss: 0.03436279296875|unsuper_loss: 0.0
-average reward score: 6.1640625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.05%) |Training time=0.43s (20.26%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5848|ppo_ep: 1|act_loss: -0.023712158203125|cri_loss: -0.0104217529296875|unsuper_loss: 0.0
-average reward score: 5.67578125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.91%) |Training time=0.44s (20.43%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.51
-[2023-04-14 12:22:29,465] [INFO] [logging.py:96:log_dist] [Rank 0] step=5850, skipped=74, lr=[2.0433093037414786e-06, 2.0433093037414786e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:22:29,484] [INFO] [timer.py:199:stop] epoch=0/micro_step=5850/global_step=5850, RunningAvgSamplesPerSec=105.40682625158138, CurrSamplesPerSec=117.7084163556254, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:22:29,576] [INFO] [logging.py:96:log_dist] [Rank 0] step=5850, skipped=96, lr=[1.076062725650594e-06, 1.076062725650594e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5849|ppo_ep: 1|act_loss: -0.00316619873046875|cri_loss: -0.0006608963012695312|unsuper_loss: 0.0
-average reward score: 5.28125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.03%) |Training time=0.43s (20.31%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5850|ppo_ep: 1|act_loss: -0.00543975830078125|cri_loss: -0.00247955322265625|unsuper_loss: 0.0
-average reward score: 5.6640625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.09%) |Training time=0.43s (20.23%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5851|ppo_ep: 1|act_loss: -0.03326416015625|cri_loss: -0.016357421875|unsuper_loss: 0.0
-average reward score: 5.7265625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.98%) |Training time=0.44s (20.35%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5852|ppo_ep: 1|act_loss: -0.03924560546875|cri_loss: -0.0193328857421875|unsuper_loss: 0.0
-average reward score: 6.23828125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.84%) |Training time=0.44s (20.48%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5853|ppo_ep: 1|act_loss: 0.0015001296997070312|cri_loss: 0.003116607666015625|unsuper_loss: 0.0
-average reward score: 6.23828125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.76%) |Training time=0.44s (20.55%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5854|ppo_ep: 1|act_loss: -0.028717041015625|cri_loss: -0.01385498046875|unsuper_loss: 0.0
-average reward score: 5.43359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.34%) |Training time=0.45s (21.02%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5855|ppo_ep: 1|act_loss: -0.00753021240234375|cri_loss: -0.0033626556396484375|unsuper_loss: 0.0
-average reward score: 5.2421875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.40%) |Training time=0.44s (20.00%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5856|ppo_ep: 1|act_loss: 0.032745361328125|cri_loss: 0.016998291015625|unsuper_loss: 0.0
-average reward score: 5.10546875
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.71s (76.06%) |Training time=0.44s (19.47%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.25 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5857|ppo_ep: 1|act_loss: -0.00849151611328125|cri_loss: -0.0033931732177734375|unsuper_loss: 0.0
-average reward score: 4.5859375
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.61s (72.74%) |Training time=0.46s (20.72%) |Others=0.14 (6.54%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5858|ppo_ep: 1|act_loss: 0.0016469955444335938|cri_loss: 0.0010538101196289062|unsuper_loss: 0.0
-average reward score: 5.21875
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.66s (74.10%) |Training time=0.48s (21.43%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.25 |AvgSamplesPerSec=14.51
-[2023-04-14 12:22:51,232] [INFO] [logging.py:96:log_dist] [Rank 0] step=5860, skipped=74, lr=[2.028151603924359e-06, 2.028151603924359e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:22:51,250] [INFO] [timer.py:199:stop] epoch=0/micro_step=5860/global_step=5860, RunningAvgSamplesPerSec=105.41776771499981, CurrSamplesPerSec=105.08171960509524, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:22:51,343] [INFO] [logging.py:96:log_dist] [Rank 0] step=5860, skipped=96, lr=[1.068162149798737e-06, 1.068162149798737e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5859|ppo_ep: 1|act_loss: 0.0809326171875|cri_loss: 0.04217529296875|unsuper_loss: 0.0
-average reward score: 4.53125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.74%) |Training time=0.47s (21.65%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5860|ppo_ep: 1|act_loss: -0.0221710205078125|cri_loss: -0.01047515869140625|unsuper_loss: 0.0
-average reward score: 5.0078125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.80%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5861|ppo_ep: 1|act_loss: -0.00592041015625|cri_loss: -0.0028438568115234375|unsuper_loss: 0.0
-average reward score: 6.1953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.86%) |Training time=0.46s (21.52%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5862|ppo_ep: 1|act_loss: 0.00959014892578125|cri_loss: 0.00543975830078125|unsuper_loss: 0.0
-average reward score: 5.10546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.62%) |Training time=0.47s (21.76%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5863|ppo_ep: 1|act_loss: -0.008148193359375|cri_loss: -0.002391815185546875|unsuper_loss: 0.0
-average reward score: 4.66796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (22.02%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5864|ppo_ep: 1|act_loss: -0.003368377685546875|cri_loss: -0.0016183853149414062|unsuper_loss: 0.0
-average reward score: 4.703125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.88%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5865|ppo_ep: 1|act_loss: -0.0250701904296875|cri_loss: -0.012115478515625|unsuper_loss: 0.0
-average reward score: 4.56640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.83%) |Training time=0.46s (21.55%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5866|ppo_ep: 1|act_loss: -0.015777587890625|cri_loss: -0.00771331787109375|unsuper_loss: 0.0
-average reward score: 4.5625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.81%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5867|ppo_ep: 1|act_loss: -0.0160980224609375|cri_loss: -0.007701873779296875|unsuper_loss: 0.0
-average reward score: 4.48046875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.80%) |Training time=0.46s (21.55%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5868|ppo_ep: 1|act_loss: 0.00677490234375|cri_loss: 0.0037555694580078125|unsuper_loss: 0.0
-average reward score: 4.53515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.81%) |Training time=0.47s (21.57%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.51
-[2023-04-14 12:23:12,850] [INFO] [logging.py:96:log_dist] [Rank 0] step=5870, skipped=74, lr=[2.013035360147499e-06, 2.013035360147499e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:23:12,868] [INFO] [timer.py:199:stop] epoch=0/micro_step=5870/global_step=5870, RunningAvgSamplesPerSec=105.4160718237638, CurrSamplesPerSec=106.12563552319669, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:23:12,961] [INFO] [logging.py:96:log_dist] [Rank 0] step=5870, skipped=96, lr=[1.0602827972379417e-06, 1.0602827972379417e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5869|ppo_ep: 1|act_loss: 0.020050048828125|cri_loss: 0.0104827880859375|unsuper_loss: 0.0
-average reward score: 4.8828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.92%) |Training time=0.46s (21.46%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5870|ppo_ep: 1|act_loss: 0.0234375|cri_loss: 0.01206207275390625|unsuper_loss: 0.0
-average reward score: 5.0625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.85%) |Training time=0.46s (21.52%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5871|ppo_ep: 1|act_loss: -0.0171356201171875|cri_loss: -0.00823974609375|unsuper_loss: 0.0
-average reward score: 5.43359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.85%) |Training time=0.46s (21.50%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5872|ppo_ep: 1|act_loss: -0.01873779296875|cri_loss: -0.00913238525390625|unsuper_loss: 0.0
-average reward score: 5.94921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.09%) |Training time=0.46s (21.27%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5873|ppo_ep: 1|act_loss: 0.0076446533203125|cri_loss: 0.004428863525390625|unsuper_loss: 0.0
-average reward score: 5.64453125
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.64%) |Training time=0.47s (20.04%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5874|ppo_ep: 1|act_loss: 0.00121307373046875|cri_loss: 0.0009002685546875|unsuper_loss: 0.0
-average reward score: 5.98828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.28%) |Training time=0.48s (22.14%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5875|ppo_ep: 1|act_loss: -0.0013256072998046875|cri_loss: -0.0005035400390625|unsuper_loss: 0.0
-average reward score: 6.109375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.58%) |Training time=0.47s (21.77%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5876|ppo_ep: 1|act_loss: -0.0012073516845703125|cri_loss: -0.00045490264892578125|unsuper_loss: 0.0
-average reward score: 6.01953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.39%) |Training time=0.48s (21.99%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5877|ppo_ep: 1|act_loss: 0.0103302001953125|cri_loss: 0.005706787109375|unsuper_loss: 0.0
-average reward score: 4.75390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.48s (21.96%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5878|ppo_ep: 1|act_loss: -0.007904052734375|cri_loss: -0.003818511962890625|unsuper_loss: 0.0
-average reward score: 4.80859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.87%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.51
-[2023-04-14 12:23:34,691] [INFO] [logging.py:96:log_dist] [Rank 0] step=5880, skipped=74, lr=[1.9979607964700985e-06, 1.9979607964700985e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:23:34,710] [INFO] [timer.py:199:stop] epoch=0/micro_step=5880/global_step=5880, RunningAvgSamplesPerSec=105.41309467540493, CurrSamplesPerSec=104.21140785612961, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:23:34,803] [INFO] [logging.py:96:log_dist] [Rank 0] step=5880, skipped=96, lr=[1.0524247847592191e-06, 1.0524247847592191e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5879|ppo_ep: 1|act_loss: -0.00569915771484375|cri_loss: -0.0027313232421875|unsuper_loss: 0.0
-average reward score: 5.43359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.68%) |Training time=0.47s (21.68%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5880|ppo_ep: 1|act_loss: 0.03271484375|cri_loss: 0.016754150390625|unsuper_loss: 0.0
-average reward score: 5.734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.48s (21.98%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5881|ppo_ep: 1|act_loss: -0.01360321044921875|cri_loss: -0.006427764892578125|unsuper_loss: 0.0
-average reward score: 4.65625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.48s (22.21%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5882|ppo_ep: 1|act_loss: 0.0108489990234375|cri_loss: 0.005596160888671875|unsuper_loss: 0.0
-average reward score: 5.2421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (21.99%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.51
-[2023-04-14 12:23:43,477] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 5883|ppo_ep: 1|act_loss: 0.0019989013671875|cri_loss: 0.0011072158813476562|unsuper_loss: 0.0
-average reward score: 6.0
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.73%) |Training time=0.48s (22.14%) |Others=0.09 (4.14%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.51
-[2023-04-14 12:23:45,644] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 5884|ppo_ep: 1|act_loss: 0.025115966796875|cri_loss: 0.01308441162109375|unsuper_loss: 0.0
-average reward score: 5.87109375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.76%) |Training time=0.48s (22.02%) |Others=0.09 (4.22%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5885|ppo_ep: 1|act_loss: 0.004608154296875|cri_loss: 0.0024166107177734375|unsuper_loss: 0.0
-average reward score: 5.42578125
--------------------------------------------------------------------------------------
-|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.62s (72.49%) |Training time=0.51s (23.01%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.33 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5886|ppo_ep: 1|act_loss: -0.02362060546875|cri_loss: -0.0113067626953125|unsuper_loss: 0.0
-average reward score: 4.69921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.48s (22.31%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5887|ppo_ep: 1|act_loss: 0.013458251953125|cri_loss: 0.00859832763671875|unsuper_loss: 0.0
-average reward score: 4.7890625
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.50%) |Training time=0.47s (20.16%) |Others=0.10 (4.34%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5888|ppo_ep: 1|act_loss: 0.0234375|cri_loss: 0.01216888427734375|unsuper_loss: 0.0
-average reward score: 5.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.30%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.51
-[2023-04-14 12:23:56,602] [INFO] [logging.py:96:log_dist] [Rank 0] step=5890, skipped=74, lr=[1.982928136333553e-06, 1.982928136333553e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:23:56,621] [INFO] [timer.py:199:stop] epoch=0/micro_step=5890/global_step=5890, RunningAvgSamplesPerSec=105.40534437177286, CurrSamplesPerSec=102.15519602149695, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:23:56,713] [INFO] [logging.py:96:log_dist] [Rank 0] step=5890, skipped=98, lr=[1.0461538179170056e-06, 1.0461538179170056e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5889|ppo_ep: 1|act_loss: -0.00814056396484375|cri_loss: -0.00372314453125|unsuper_loss: 0.0
-average reward score: 5.33984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.48s (21.97%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5890|ppo_ep: 1|act_loss: 0.031341552734375|cri_loss: 0.0159759521484375|unsuper_loss: 0.0
-average reward score: 5.60546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.47s (21.95%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5891|ppo_ep: 1|act_loss: 0.031219482421875|cri_loss: 0.01629638671875|unsuper_loss: 0.0
-average reward score: 5.31640625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.54%) |Training time=0.48s (21.88%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5892|ppo_ep: 1|act_loss: 0.0266265869140625|cri_loss: 0.0140533447265625|unsuper_loss: 0.0
-average reward score: 5.12890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.47s (21.94%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5893|ppo_ep: 1|act_loss: 0.0418701171875|cri_loss: 0.0213165283203125|unsuper_loss: 0.0
-average reward score: 5.39453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.18%) |Training time=0.48s (22.22%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5894|ppo_ep: 1|act_loss: 0.00766754150390625|cri_loss: 0.004138946533203125|unsuper_loss: 0.0
-average reward score: 4.8203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.28%) |Training time=0.48s (22.09%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5895|ppo_ep: 1|act_loss: -0.00994873046875|cri_loss: -0.00438690185546875|unsuper_loss: 0.0
-average reward score: 5.36328125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.81%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5896|ppo_ep: 1|act_loss: -0.0102691650390625|cri_loss: -0.004901885986328125|unsuper_loss: 0.0
-average reward score: 4.81640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.33%) |Training time=0.48s (22.06%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5897|ppo_ep: 1|act_loss: 0.01910400390625|cri_loss: 0.0099334716796875|unsuper_loss: 0.0
-average reward score: 5.21484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.46%) |Training time=0.47s (21.91%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5898|ppo_ep: 1|act_loss: -0.032958984375|cri_loss: -0.0158843994140625|unsuper_loss: 0.0
-average reward score: 4.57421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.88%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.51
-[2023-04-14 12:24:18,277] [INFO] [logging.py:96:log_dist] [Rank 0] step=5900, skipped=74, lr=[1.9679376025581524e-06, 1.9679376025581524e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:24:18,295] [INFO] [timer.py:199:stop] epoch=0/micro_step=5900/global_step=5900, RunningAvgSamplesPerSec=105.39940940116666, CurrSamplesPerSec=103.16481053464135, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:24:18,393] [INFO] [logging.py:96:log_dist] [Rank 0] step=5900, skipped=98, lr=[1.0383345108885993e-06, 1.0383345108885993e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5899|ppo_ep: 1|act_loss: -0.025238037109375|cri_loss: -0.01222991943359375|unsuper_loss: 0.0
-average reward score: 5.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.47s (21.78%) |Others=0.10 (4.80%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5900|ppo_ep: 1|act_loss: -0.004428863525390625|cri_loss: -0.0018815994262695312|unsuper_loss: 0.0
-average reward score: 4.9375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.27%) |Training time=0.48s (22.11%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5901|ppo_ep: 1|act_loss: -0.01324462890625|cri_loss: -0.00623321533203125|unsuper_loss: 0.0
-average reward score: 5.29296875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.86%) |Training time=0.47s (21.51%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5902|ppo_ep: 1|act_loss: -0.00580596923828125|cri_loss: -0.0019092559814453125|unsuper_loss: 0.0
-average reward score: 5.48828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.52%) |Training time=0.47s (21.86%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5903|ppo_ep: 1|act_loss: 0.0159912109375|cri_loss: 0.008148193359375|unsuper_loss: 0.0
-average reward score: 5.6328125
--------------------------------------------------------------------------------------
-|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.77s (74.42%) |Training time=0.51s (21.33%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.48 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5904|ppo_ep: 1|act_loss: -0.0110626220703125|cri_loss: -0.005344390869140625|unsuper_loss: 0.0
-average reward score: 5.62109375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.63%) |Training time=0.47s (21.74%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.51
-[2023-04-14 12:24:31,500] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 5905|ppo_ep: 1|act_loss: 0.025054931640625|cri_loss: 0.0130462646484375|unsuper_loss: 0.0
-average reward score: 5.08984375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.39%) |Training time=0.45s (20.93%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5906|ppo_ep: 1|act_loss: 0.0015077590942382812|cri_loss: 0.0011386871337890625|unsuper_loss: 0.0
-average reward score: 5.28125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.94%) |Training time=0.49s (22.46%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5907|ppo_ep: 1|act_loss: 0.00643157958984375|cri_loss: 0.0035247802734375|unsuper_loss: 0.0
-average reward score: 5.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.94%) |Training time=0.49s (22.45%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5908|ppo_ep: 1|act_loss: 0.006465911865234375|cri_loss: 0.0035915374755859375|unsuper_loss: 0.0
-average reward score: 5.828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.48s (22.02%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.51
-[2023-04-14 12:24:40,173] [INFO] [logging.py:96:log_dist] [Rank 0] step=5910, skipped=75, lr=[1.9544823238569153e-06, 1.9544823238569153e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:24:40,191] [INFO] [timer.py:199:stop] epoch=0/micro_step=5910/global_step=5910, RunningAvgSamplesPerSec=105.39236435819984, CurrSamplesPerSec=102.31007903210069, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:24:40,284] [INFO] [logging.py:96:log_dist] [Rank 0] step=5910, skipped=98, lr=[1.0305368692688175e-06, 1.0305368692688175e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5909|ppo_ep: 1|act_loss: -0.00597381591796875|cri_loss: -0.0027923583984375|unsuper_loss: 0.0
-average reward score: 5.86328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.46%) |Training time=0.48s (21.93%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.51
-[2023-04-14 12:24:42,333] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192
-epoch: 0|step: 5910|ppo_ep: 1|act_loss: 0.0025787353515625|cri_loss: 0.0016918182373046875|unsuper_loss: 0.0
-average reward score: 5.3359375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.17%) |Training time=0.45s (21.15%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5911|ppo_ep: 1|act_loss: -0.002712249755859375|cri_loss: -0.001117706298828125|unsuper_loss: 0.0
-average reward score: 5.37890625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.89%) |Training time=0.49s (22.52%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5912|ppo_ep: 1|act_loss: 0.0108795166015625|cri_loss: 0.00567626953125|unsuper_loss: 0.0
-average reward score: 4.9921875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.80%) |Training time=0.49s (22.62%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5913|ppo_ep: 1|act_loss: -0.01776123046875|cri_loss: -0.00823974609375|unsuper_loss: 0.0
-average reward score: 6.1640625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.50%) |Training time=0.50s (22.93%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5914|ppo_ep: 1|act_loss: -0.003353118896484375|cri_loss: -0.0009851455688476562|unsuper_loss: 0.0
-average reward score: 4.6328125
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.53%) |Training time=0.48s (21.92%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5915|ppo_ep: 1|act_loss: -0.0023899078369140625|cri_loss: -0.0008401870727539062|unsuper_loss: 0.0
-average reward score: 4.99609375
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.43%) |Training time=0.48s (21.18%) |Others=0.10 (4.39%)|CurSamplesPerSec=14.06 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5916|ppo_ep: 1|act_loss: 0.01297760009765625|cri_loss: 0.0069427490234375|unsuper_loss: 0.0
-average reward score: 5.5
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.15%) |Training time=0.48s (22.20%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5917|ppo_ep: 1|act_loss: -0.0005397796630859375|cri_loss: 0.00010776519775390625|unsuper_loss: 0.0
-average reward score: 5.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.26%) |Training time=0.48s (22.12%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5918|ppo_ep: 1|act_loss: -0.0025806427001953125|cri_loss: -0.0009760856628417969|unsuper_loss: 0.0
-average reward score: 5.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.20%) |Training time=0.47s (21.63%) |Others=0.11 (5.17%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.51
-[2023-04-14 12:25:02,201] [INFO] [logging.py:96:log_dist] [Rank 0] step=5920, skipped=76, lr=[1.9410615090410984e-06, 1.9410615090410984e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:25:02,220] [INFO] [timer.py:199:stop] epoch=0/micro_step=5920/global_step=5920, RunningAvgSamplesPerSec=105.38332702712113, CurrSamplesPerSec=103.77662056098468, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:25:02,313] [INFO] [logging.py:96:log_dist] [Rank 0] step=5920, skipped=98, lr=[1.0227610086375208e-06, 1.0227610086375208e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5919|ppo_ep: 1|act_loss: -0.0345458984375|cri_loss: -0.0159912109375|unsuper_loss: 0.0
-average reward score: 5.0234375
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.74s (75.05%) |Training time=0.47s (20.33%) |Others=0.11 (4.62%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5920|ppo_ep: 1|act_loss: -0.0001728534698486328|cri_loss: 6.532669067382812e-05|unsuper_loss: 0.0
-average reward score: 5.6484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.12%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5921|ppo_ep: 1|act_loss: -0.027099609375|cri_loss: -0.0131988525390625|unsuper_loss: 0.0
-average reward score: 5.16796875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.47s (21.93%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5922|ppo_ep: 1|act_loss: 0.04107666015625|cri_loss: 0.02093505859375|unsuper_loss: 0.0
-average reward score: 4.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.21%) |Training time=0.48s (22.19%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5923|ppo_ep: 1|act_loss: -0.01739501953125|cri_loss: -0.00836181640625|unsuper_loss: 0.0
-average reward score: 6.3359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.48s (21.94%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5924|ppo_ep: 1|act_loss: -0.0078277587890625|cri_loss: -0.003665924072265625|unsuper_loss: 0.0
-average reward score: 5.00390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.68%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5925|ppo_ep: 1|act_loss: -0.03497314453125|cri_loss: -0.0165863037109375|unsuper_loss: 0.0
-average reward score: 4.30078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.48s (21.94%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5926|ppo_ep: 1|act_loss: -0.026092529296875|cri_loss: -0.01259613037109375|unsuper_loss: 0.0
-average reward score: 5.875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.47s (21.59%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5927|ppo_ep: 1|act_loss: 0.0107879638671875|cri_loss: 0.005474090576171875|unsuper_loss: 0.0
-average reward score: 5.51953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.73%) |Training time=0.47s (21.64%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5928|ppo_ep: 1|act_loss: 0.05828857421875|cri_loss: 0.030517578125|unsuper_loss: 0.0
-average reward score: 5.65625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.98%) |Training time=0.47s (21.41%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.51
-[2023-04-14 12:25:23,865] [INFO] [logging.py:96:log_dist] [Rank 0] step=5930, skipped=76, lr=[1.9261901091532087e-06, 1.9261901091532087e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:25:23,884] [INFO] [timer.py:199:stop] epoch=0/micro_step=5930/global_step=5930, RunningAvgSamplesPerSec=105.37955887751461, CurrSamplesPerSec=105.48167242600165, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:25:23,976] [INFO] [logging.py:96:log_dist] [Rank 0] step=5930, skipped=98, lr=[1.0150070442517201e-06, 1.0150070442517201e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5929|ppo_ep: 1|act_loss: 0.220703125|cri_loss: 0.143310546875|unsuper_loss: 0.0
-average reward score: 5.23828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.79%) |Training time=0.47s (21.57%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5930|ppo_ep: 1|act_loss: -0.004055023193359375|cri_loss: -0.0017652511596679688|unsuper_loss: 0.0
-average reward score: 5.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.47s (21.58%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5931|ppo_ep: 1|act_loss: 0.0077362060546875|cri_loss: 0.004604339599609375|unsuper_loss: 0.0
-average reward score: 5.19140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.65%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5932|ppo_ep: 1|act_loss: -0.0015506744384765625|cri_loss: -0.000553131103515625|unsuper_loss: 0.0
-average reward score: 5.72265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.79%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5933|ppo_ep: 1|act_loss: 0.006988525390625|cri_loss: 0.003902435302734375|unsuper_loss: 0.0
-average reward score: 5.46484375
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.61s (72.52%) |Training time=0.46s (20.62%) |Others=0.15 (6.86%)|CurSamplesPerSec=14.38 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5934|ppo_ep: 1|act_loss: -0.014007568359375|cri_loss: -0.0065155029296875|unsuper_loss: 0.0
-average reward score: 5.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.25%) |Training time=0.46s (20.30%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.20 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5935|ppo_ep: 1|act_loss: -0.0095367431640625|cri_loss: -0.00386810302734375|unsuper_loss: 0.0
-average reward score: 5.2578125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.01%) |Training time=0.46s (21.36%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5936|ppo_ep: 1|act_loss: 0.007232666015625|cri_loss: 0.003726959228515625|unsuper_loss: 0.0
-average reward score: 6.13671875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.45s (21.14%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5937|ppo_ep: 1|act_loss: -0.00814056396484375|cri_loss: -0.0037689208984375|unsuper_loss: 0.0
-average reward score: 4.77734375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.20%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5938|ppo_ep: 1|act_loss: -0.009490966796875|cri_loss: -0.0043487548828125|unsuper_loss: 0.0
-average reward score: 5.55859375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.15%) |Training time=0.45s (21.17%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.51
-[2023-04-14 12:25:45,564] [INFO] [logging.py:96:log_dist] [Rank 0] step=5940, skipped=76, lr=[1.911361676620899e-06, 1.911361676620899e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:25:45,583] [INFO] [timer.py:199:stop] epoch=0/micro_step=5940/global_step=5940, RunningAvgSamplesPerSec=105.38352468920138, CurrSamplesPerSec=110.74535974698605, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:25:45,675] [INFO] [logging.py:96:log_dist] [Rank 0] step=5940, skipped=98, lr=[1.0072750910438734e-06, 1.0072750910438734e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5939|ppo_ep: 1|act_loss: 0.01202392578125|cri_loss: 0.006191253662109375|unsuper_loss: 0.0
-average reward score: 6.58203125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.28%) |Training time=0.45s (21.07%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5940|ppo_ep: 1|act_loss: -0.003948211669921875|cri_loss: -0.0017728805541992188|unsuper_loss: 0.0
-average reward score: 5.39453125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.02%) |Training time=0.46s (21.32%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5941|ppo_ep: 1|act_loss: 0.029876708984375|cri_loss: 0.01517486572265625|unsuper_loss: 0.0
-average reward score: 5.10546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.46%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5942|ppo_ep: 1|act_loss: -0.0004661083221435547|cri_loss: -0.000164031982421875|unsuper_loss: 0.0
-average reward score: 4.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.96%) |Training time=0.46s (21.39%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5943|ppo_ep: 1|act_loss: 0.019561767578125|cri_loss: 0.01006317138671875|unsuper_loss: 0.0
-average reward score: 4.6796875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.23%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5944|ppo_ep: 1|act_loss: 0.0049285888671875|cri_loss: 0.0025653839111328125|unsuper_loss: 0.0
-average reward score: 5.7578125
--------------------------------------------------------------------------------------
-|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.63s (72.69%) |Training time=0.51s (22.85%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.26 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5945|ppo_ep: 1|act_loss: -0.007572174072265625|cri_loss: -0.003124237060546875|unsuper_loss: 0.0
-average reward score: 5.34375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.10%) |Training time=0.46s (21.18%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5946|ppo_ep: 1|act_loss: -0.01294708251953125|cri_loss: -0.006168365478515625|unsuper_loss: 0.0
-average reward score: 5.77734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.24%) |Training time=0.46s (21.15%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5947|ppo_ep: 1|act_loss: -0.0242156982421875|cri_loss: -0.0118408203125|unsuper_loss: 0.0
-average reward score: 4.359375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.92%) |Training time=0.46s (21.46%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5948|ppo_ep: 1|act_loss: -0.00698089599609375|cri_loss: -0.0033702850341796875|unsuper_loss: 0.0
-average reward score: 5.2421875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.18%) |Training time=0.46s (21.15%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.51
-[2023-04-14 12:26:07,375] [INFO] [logging.py:96:log_dist] [Rank 0] step=5950, skipped=76, lr=[1.8965764312373112e-06, 1.8965764312373112e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:26:07,393] [INFO] [timer.py:199:stop] epoch=0/micro_step=5950/global_step=5950, RunningAvgSamplesPerSec=105.38668188338777, CurrSamplesPerSec=107.2714592506216, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:26:07,485] [INFO] [logging.py:96:log_dist] [Rank 0] step=5950, skipped=98, lr=[9.995652636201819e-07, 9.995652636201819e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5949|ppo_ep: 1|act_loss: -0.00499725341796875|cri_loss: -0.002071380615234375|unsuper_loss: 0.0
-average reward score: 5.37109375
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.02%) |Training time=0.46s (19.71%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5950|ppo_ep: 1|act_loss: 0.01093292236328125|cri_loss: 0.00567626953125|unsuper_loss: 0.0
-average reward score: 6.38671875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.89%) |Training time=0.46s (21.40%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5951|ppo_ep: 1|act_loss: 0.019287109375|cri_loss: 0.0101776123046875|unsuper_loss: 0.0
-average reward score: 5.6484375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.01%) |Training time=0.46s (21.34%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5952|ppo_ep: 1|act_loss: -0.00455474853515625|cri_loss: -0.0020961761474609375|unsuper_loss: 0.0
-average reward score: 5.41796875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.08%) |Training time=0.46s (21.26%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5953|ppo_ep: 1|act_loss: -0.0208282470703125|cri_loss: -0.010009765625|unsuper_loss: 0.0
-average reward score: 5.6484375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.99%) |Training time=0.46s (21.37%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5954|ppo_ep: 1|act_loss: -0.028045654296875|cri_loss: -0.0135040283203125|unsuper_loss: 0.0
-average reward score: 4.62109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.46s (21.58%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5955|ppo_ep: 1|act_loss: 0.0128021240234375|cri_loss: 0.006572723388671875|unsuper_loss: 0.0
-average reward score: 5.17578125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.16%) |Training time=0.45s (21.16%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5956|ppo_ep: 1|act_loss: 0.002628326416015625|cri_loss: 0.00140380859375|unsuper_loss: 0.0
-average reward score: 5.6875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.07%) |Training time=0.46s (21.25%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5957|ppo_ep: 1|act_loss: -0.003032684326171875|cri_loss: -0.0013885498046875|unsuper_loss: 0.0
-average reward score: 6.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.45s (21.17%) |Others=0.10 (4.80%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5958|ppo_ep: 1|act_loss: 0.0238037109375|cri_loss: 0.0124359130859375|unsuper_loss: 0.0
-average reward score: 5.20703125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.96%) |Training time=0.46s (21.35%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.51
-[2023-04-14 12:26:28,860] [INFO] [logging.py:96:log_dist] [Rank 0] step=5960, skipped=76, lr=[1.8818345921554516e-06, 1.8818345921554516e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:26:28,878] [INFO] [timer.py:199:stop] epoch=0/micro_step=5960/global_step=5960, RunningAvgSamplesPerSec=105.39163032103932, CurrSamplesPerSec=110.05142543688827, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:26:28,971] [INFO] [logging.py:96:log_dist] [Rank 0] step=5960, skipped=98, lr=[9.91877676258884e-07, 9.91877676258884e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5959|ppo_ep: 1|act_loss: 0.0390625|cri_loss: 0.0201568603515625|unsuper_loss: 0.0
-average reward score: 5.359375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.23%) |Training time=0.45s (21.11%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5960|ppo_ep: 1|act_loss: 0.01190948486328125|cri_loss: 0.006134033203125|unsuper_loss: 0.0
-average reward score: 6.1484375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.34%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5961|ppo_ep: 1|act_loss: -0.0223541259765625|cri_loss: -0.010711669921875|unsuper_loss: 0.0
-average reward score: 4.9765625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.44%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5962|ppo_ep: 1|act_loss: 0.01100921630859375|cri_loss: 0.005786895751953125|unsuper_loss: 0.0
-average reward score: 5.0859375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.07%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5963|ppo_ep: 1|act_loss: -0.0018167495727539062|cri_loss: -0.0007228851318359375|unsuper_loss: 0.0
-average reward score: 5.3828125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.16%) |Training time=0.45s (21.16%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5964|ppo_ep: 1|act_loss: -0.0391845703125|cri_loss: -0.0184478759765625|unsuper_loss: 0.0
-average reward score: 5.8515625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.46s (21.59%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5965|ppo_ep: 1|act_loss: 0.0067138671875|cri_loss: 0.004070281982421875|unsuper_loss: 0.0
-average reward score: 5.40625
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.66s (71.69%) |Training time=0.55s (23.59%) |Others=0.11 (4.72%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5966|ppo_ep: 1|act_loss: -0.030609130859375|cri_loss: -0.014984130859375|unsuper_loss: 0.0
-average reward score: 5.03515625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.67%) |Training time=0.46s (21.67%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5967|ppo_ep: 1|act_loss: -0.02716064453125|cri_loss: -0.0134735107421875|unsuper_loss: 0.0
-average reward score: 4.7421875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.93%) |Training time=0.46s (21.40%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5968|ppo_ep: 1|act_loss: 0.034423828125|cri_loss: 0.018524169921875|unsuper_loss: 0.0
-average reward score: 5.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.25%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.51
-[2023-04-14 12:26:50,497] [INFO] [logging.py:96:log_dist] [Rank 0] step=5970, skipped=76, lr=[1.8671363778849347e-06, 1.8671363778849347e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:26:50,516] [INFO] [timer.py:199:stop] epoch=0/micro_step=5970/global_step=5970, RunningAvgSamplesPerSec=105.39215717659776, CurrSamplesPerSec=107.45963430119632, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:26:50,608] [INFO] [logging.py:96:log_dist] [Rank 0] step=5970, skipped=98, lr=[9.842124429085714e-07, 9.842124429085714e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5969|ppo_ep: 1|act_loss: 0.00429534912109375|cri_loss: 0.002338409423828125|unsuper_loss: 0.0
-average reward score: 4.8046875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.87%) |Training time=0.46s (21.48%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5970|ppo_ep: 1|act_loss: -0.0014858245849609375|cri_loss: -0.0004582405090332031|unsuper_loss: 0.0
-average reward score: 5.1875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.75%) |Training time=0.44s (20.59%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5971|ppo_ep: 1|act_loss: -0.0072021484375|cri_loss: -0.00341796875|unsuper_loss: 0.0
-average reward score: 5.09375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.94%) |Training time=0.44s (20.39%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5972|ppo_ep: 1|act_loss: 0.02154541015625|cri_loss: 0.01145172119140625|unsuper_loss: 0.0
-average reward score: 4.19140625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.35%) |Training time=0.45s (21.00%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5973|ppo_ep: 1|act_loss: 0.022430419921875|cri_loss: 0.01153564453125|unsuper_loss: 0.0
-average reward score: 4.9453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.83%) |Training time=0.44s (20.44%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5974|ppo_ep: 1|act_loss: -0.018157958984375|cri_loss: -0.0088348388671875|unsuper_loss: 0.0
-average reward score: 4.859375
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.98%) |Training time=0.44s (19.59%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5975|ppo_ep: 1|act_loss: 0.016845703125|cri_loss: 0.0087127685546875|unsuper_loss: 0.0
-average reward score: 5.0625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.76%) |Training time=0.44s (20.57%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5976|ppo_ep: 1|act_loss: -0.0218505859375|cri_loss: -0.01079559326171875|unsuper_loss: 0.0
-average reward score: 5.20703125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.82%) |Training time=0.44s (20.53%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5977|ppo_ep: 1|act_loss: -0.02777099609375|cri_loss: -0.01334381103515625|unsuper_loss: 0.0
-average reward score: 5.23046875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.65%) |Training time=0.45s (20.70%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5978|ppo_ep: 1|act_loss: -0.013427734375|cri_loss: -0.006366729736328125|unsuper_loss: 0.0
-average reward score: 5.15234375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.79%) |Training time=0.44s (20.53%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.51
-[2023-04-14 12:27:12,106] [INFO] [logging.py:96:log_dist] [Rank 0] step=5980, skipped=76, lr=[1.8524820062887546e-06, 1.8524820062887546e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:27:12,124] [INFO] [timer.py:199:stop] epoch=0/micro_step=5980/global_step=5980, RunningAvgSamplesPerSec=105.40602482628336, CurrSamplesPerSec=113.2016207116422, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:27:12,218] [INFO] [logging.py:96:log_dist] [Rank 0] step=5980, skipped=98, lr=[9.765696771864933e-07, 9.765696771864933e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5979|ppo_ep: 1|act_loss: 0.0231781005859375|cri_loss: 0.01198577880859375|unsuper_loss: 0.0
-average reward score: 4.875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.55%) |Training time=0.45s (20.73%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52
-epoch: 0|step: 5980|ppo_ep: 1|act_loss: 0.021240234375|cri_loss: 0.01136016845703125|unsuper_loss: 0.0
-average reward score: 5.29296875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.74%) |Training time=0.46s (21.57%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.52
-epoch: 0|step: 5981|ppo_ep: 1|act_loss: 0.01788330078125|cri_loss: 0.009185791015625|unsuper_loss: 0.0
-average reward score: 5.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.74s (75.18%) |Training time=0.47s (20.50%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.82 |AvgSamplesPerSec=14.51
-epoch: 0|step: 5982|ppo_ep: 1|act_loss: 0.034881591796875|cri_loss: 0.0184783935546875|unsuper_loss: 0.0
-average reward score: 5.31640625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.13%) |Training time=0.48s (22.23%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52
-epoch: 0|step: 5983|ppo_ep: 1|act_loss: -0.0008478164672851562|cri_loss: -0.00020265579223632812|unsuper_loss: 0.0
-average reward score: 5.8046875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.40%) |Training time=0.47s (21.95%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.52
-epoch: 0|step: 5984|ppo_ep: 1|act_loss: -0.024932861328125|cri_loss: -0.01213836669921875|unsuper_loss: 0.0
-average reward score: 6.08984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.51%) |Training time=0.47s (21.80%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
-[2023-04-14 12:27:25,255] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 5985|ppo_ep: 1|act_loss: -0.00849151611328125|cri_loss: -0.0028514862060546875|unsuper_loss: 0.0
-average reward score: 5.6953125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.93%) |Training time=0.47s (21.88%) |Others=0.09 (4.19%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.52
-[2023-04-14 12:27:27,391] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 5986|ppo_ep: 1|act_loss: -0.0246734619140625|cri_loss: -0.01187896728515625|unsuper_loss: 0.0
-average reward score: 4.84765625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.76%) |Training time=0.47s (22.04%) |Others=0.09 (4.20%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.52
-epoch: 0|step: 5987|ppo_ep: 1|act_loss: 0.0214080810546875|cri_loss: 0.0121612548828125|unsuper_loss: 0.0
-average reward score: 5.44921875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.84%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52
-epoch: 0|step: 5988|ppo_ep: 1|act_loss: -0.01424407958984375|cri_loss: -0.005828857421875|unsuper_loss: 0.0
-average reward score: 5.1875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.42%) |Training time=0.47s (21.92%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
-[2023-04-14 12:27:33,716] [INFO] [logging.py:96:log_dist] [Rank 0] step=5990, skipped=76, lr=[1.8378716945800507e-06, 1.8378716945800507e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:27:33,734] [INFO] [timer.py:199:stop] epoch=0/micro_step=5990/global_step=5990, RunningAvgSamplesPerSec=105.40360050801567, CurrSamplesPerSec=103.72817841981488, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:27:33,827] [INFO] [logging.py:96:log_dist] [Rank 0] step=5990, skipped=100, lr=[9.704717174393912e-07, 9.704717174393912e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5989|ppo_ep: 1|act_loss: -0.00201416015625|cri_loss: -0.0007715225219726562|unsuper_loss: 0.0
-average reward score: 5.1875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.40%) |Training time=0.47s (21.93%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52
-epoch: 0|step: 5990|ppo_ep: 1|act_loss: -0.023345947265625|cri_loss: -0.01153564453125|unsuper_loss: 0.0
-average reward score: 5.45703125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.58%) |Training time=0.47s (21.75%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52
-epoch: 0|step: 5991|ppo_ep: 1|act_loss: 0.0107879638671875|cri_loss: 0.005706787109375|unsuper_loss: 0.0
-average reward score: 5.2421875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.41%) |Training time=0.47s (21.93%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52
-epoch: 0|step: 5992|ppo_ep: 1|act_loss: 0.0079498291015625|cri_loss: 0.0045623779296875|unsuper_loss: 0.0
-average reward score: 5.38671875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.40%) |Training time=0.47s (21.92%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.52
-epoch: 0|step: 5993|ppo_ep: 1|act_loss: 0.0242919921875|cri_loss: 0.01251220703125|unsuper_loss: 0.0
-average reward score: 5.09765625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.28%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
-epoch: 0|step: 5994|ppo_ep: 1|act_loss: -0.0028228759765625|cri_loss: -0.0012311935424804688|unsuper_loss: 0.0
-average reward score: 5.796875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.64%) |Training time=0.47s (21.69%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
-epoch: 0|step: 5995|ppo_ep: 1|act_loss: -0.00330352783203125|cri_loss: -0.0015287399291992188|unsuper_loss: 0.0
-average reward score: 4.609375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.84%) |Training time=0.46s (21.50%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52
-epoch: 0|step: 5996|ppo_ep: 1|act_loss: 0.0069732666015625|cri_loss: 0.0037631988525390625|unsuper_loss: 0.0
-average reward score: 6.5546875
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.96%) |Training time=0.45s (20.29%) |Others=0.17 (7.74%)|CurSamplesPerSec=14.44 |AvgSamplesPerSec=14.52
-epoch: 0|step: 5997|ppo_ep: 1|act_loss: -0.005279541015625|cri_loss: -0.0024662017822265625|unsuper_loss: 0.0
-average reward score: 5.30078125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.22%) |Training time=0.45s (21.12%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
-epoch: 0|step: 5998|ppo_ep: 1|act_loss: -0.0168304443359375|cri_loss: -0.00815582275390625|unsuper_loss: 0.0
-average reward score: 6.7109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.05%) |Training time=0.46s (21.30%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52
-[2023-04-14 12:27:55,249] [INFO] [logging.py:96:log_dist] [Rank 0] step=6000, skipped=76, lr=[1.8233056593188891e-06, 1.8233056593188891e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:27:55,267] [INFO] [timer.py:199:stop] epoch=0/micro_step=6000/global_step=6000, RunningAvgSamplesPerSec=105.40675121024026, CurrSamplesPerSec=109.96774978513238, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:27:55,360] [INFO] [logging.py:96:log_dist] [Rank 0] step=6000, skipped=100, lr=[9.62869678699519e-07, 9.62869678699519e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 5999|ppo_ep: 1|act_loss: -0.0345458984375|cri_loss: -0.0167236328125|unsuper_loss: 0.0
-average reward score: 5.58984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.19%) |Training time=0.45s (21.15%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6000|ppo_ep: 1|act_loss: -0.0182952880859375|cri_loss: -0.008880615234375|unsuper_loss: 0.0
-average reward score: 5.69140625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.99%) |Training time=0.46s (21.37%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6001|ppo_ep: 1|act_loss: -0.002777099609375|cri_loss: -0.0012273788452148438|unsuper_loss: 0.0
-average reward score: 5.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.32%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6002|ppo_ep: 1|act_loss: 0.0582275390625|cri_loss: 0.03076171875|unsuper_loss: 0.0
-average reward score: 6.01171875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.87%) |Training time=0.47s (21.53%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6003|ppo_ep: 1|act_loss: -0.0167999267578125|cri_loss: -0.0081329345703125|unsuper_loss: 0.0
-average reward score: 4.9765625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.45%) |Training time=0.46s (20.97%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6004|ppo_ep: 1|act_loss: -0.0114898681640625|cri_loss: -0.00524139404296875|unsuper_loss: 0.0
-average reward score: 4.78125
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.89%) |Training time=0.47s (20.71%) |Others=0.10 (4.41%)|CurSamplesPerSec=14.17 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6005|ppo_ep: 1|act_loss: -0.0033092498779296875|cri_loss: -0.0012836456298828125|unsuper_loss: 0.0
-average reward score: 4.9453125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.10%) |Training time=0.46s (21.22%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6006|ppo_ep: 1|act_loss: 0.0024547576904296875|cri_loss: 0.001491546630859375|unsuper_loss: 0.0
-average reward score: 5.1640625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.45s (21.14%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6007|ppo_ep: 1|act_loss: -0.001834869384765625|cri_loss: -0.0005602836608886719|unsuper_loss: 0.0
-average reward score: 5.73828125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.16%) |Training time=0.46s (21.20%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6008|ppo_ep: 1|act_loss: -0.028045654296875|cri_loss: -0.0137939453125|unsuper_loss: 0.0
-average reward score: 4.78125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.08%) |Training time=0.46s (21.25%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52
-[2023-04-14 12:28:16,897] [INFO] [logging.py:96:log_dist] [Rank 0] step=6010, skipped=76, lr=[1.8087841164090489e-06, 1.8087841164090489e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:28:16,915] [INFO] [timer.py:199:stop] epoch=0/micro_step=6010/global_step=6010, RunningAvgSamplesPerSec=105.41120763475524, CurrSamplesPerSec=110.29632922803589, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:28:17,008] [INFO] [logging.py:96:log_dist] [Rank 0] step=6010, skipped=100, lr=[9.552904239391322e-07, 9.552904239391322e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6009|ppo_ep: 1|act_loss: 0.01910400390625|cri_loss: 0.00994110107421875|unsuper_loss: 0.0
-average reward score: 5.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.25%) |Training time=0.45s (21.09%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6010|ppo_ep: 1|act_loss: -0.01312255859375|cri_loss: -0.006427764892578125|unsuper_loss: 0.0
-average reward score: 4.97265625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.30%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6011|ppo_ep: 1|act_loss: -0.0002875328063964844|cri_loss: 5.936622619628906e-05|unsuper_loss: 0.0
-average reward score: 5.35546875
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.72s (74.56%) |Training time=0.48s (20.78%) |Others=0.11 (4.66%)|CurSamplesPerSec=13.84 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6012|ppo_ep: 1|act_loss: -0.028472900390625|cri_loss: -0.0135345458984375|unsuper_loss: 0.0
-average reward score: 5.046875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.06%) |Training time=0.46s (21.29%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6013|ppo_ep: 1|act_loss: -0.027679443359375|cri_loss: -0.013671875|unsuper_loss: 0.0
-average reward score: 5.8828125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.00%) |Training time=0.46s (21.35%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6014|ppo_ep: 1|act_loss: -0.01953125|cri_loss: -0.0093841552734375|unsuper_loss: 0.0
-average reward score: 5.3203125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.13%) |Training time=0.45s (21.19%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6015|ppo_ep: 1|act_loss: -0.02752685546875|cri_loss: -0.01348114013671875|unsuper_loss: 0.0
-average reward score: 5.9609375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.36%) |Training time=0.45s (20.97%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6016|ppo_ep: 1|act_loss: 0.017852783203125|cri_loss: 0.0092010498046875|unsuper_loss: 0.0
-average reward score: 4.97265625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.18%) |Training time=0.45s (21.05%) |Others=0.10 (4.78%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6017|ppo_ep: 1|act_loss: -0.01593017578125|cri_loss: -0.007625579833984375|unsuper_loss: 0.0
-average reward score: 5.453125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.05%) |Training time=0.45s (21.13%) |Others=0.10 (4.82%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6018|ppo_ep: 1|act_loss: 0.02801513671875|cri_loss: 0.014251708984375|unsuper_loss: 0.0
-average reward score: 5.14453125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.34%) |Training time=0.47s (21.98%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52
-[2023-04-14 12:28:38,536] [INFO] [logging.py:96:log_dist] [Rank 0] step=6020, skipped=76, lr=[1.7943072810948272e-06, 1.7943072810948272e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:28:38,555] [INFO] [timer.py:199:stop] epoch=0/micro_step=6020/global_step=6020, RunningAvgSamplesPerSec=105.41476522670784, CurrSamplesPerSec=105.24371287159768, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:28:38,648] [INFO] [logging.py:96:log_dist] [Rank 0] step=6020, skipped=100, lr=[9.477340655010717e-07, 9.477340655010717e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6019|ppo_ep: 1|act_loss: -0.04486083984375|cri_loss: -0.021087646484375|unsuper_loss: 0.0
-average reward score: 5.32421875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.57%) |Training time=0.47s (21.75%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6020|ppo_ep: 1|act_loss: -0.005889892578125|cri_loss: -0.0028514862060546875|unsuper_loss: 0.0
-average reward score: 5.58203125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.41%) |Training time=0.47s (21.90%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6021|ppo_ep: 1|act_loss: 0.00341033935546875|cri_loss: 0.002422332763671875|unsuper_loss: 0.0
-average reward score: 5.5234375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.56%) |Training time=0.46s (21.62%) |Others=0.10 (4.82%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6022|ppo_ep: 1|act_loss: 0.0144805908203125|cri_loss: 0.007442474365234375|unsuper_loss: 0.0
-average reward score: 4.9453125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.58%) |Training time=0.47s (21.75%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6023|ppo_ep: 1|act_loss: 0.013458251953125|cri_loss: 0.006992340087890625|unsuper_loss: 0.0
-average reward score: 4.98046875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.13%) |Training time=0.48s (22.22%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6024|ppo_ep: 1|act_loss: -0.00662994384765625|cri_loss: -0.0031528472900390625|unsuper_loss: 0.0
-average reward score: 5.96484375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.48%) |Training time=0.47s (21.83%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6025|ppo_ep: 1|act_loss: 0.01381683349609375|cri_loss: 0.007251739501953125|unsuper_loss: 0.0
-average reward score: 4.8203125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.25%) |Training time=0.45s (21.08%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6026|ppo_ep: 1|act_loss: 0.056182861328125|cri_loss: 0.0298004150390625|unsuper_loss: 0.0
-average reward score: 5.30078125
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.23%) |Training time=0.45s (19.42%) |Others=0.10 (4.35%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6027|ppo_ep: 1|act_loss: 0.0283355712890625|cri_loss: 0.0158233642578125|unsuper_loss: 0.0
-average reward score: 5.421875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.15%) |Training time=0.45s (21.18%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6028|ppo_ep: 1|act_loss: 0.0100860595703125|cri_loss: 0.00598907470703125|unsuper_loss: 0.0
-average reward score: 5.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.31%) |Training time=0.45s (21.01%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
-[2023-04-14 12:29:00,177] [INFO] [logging.py:96:log_dist] [Rank 0] step=6030, skipped=76, lr=[1.7798753679578482e-06, 1.7798753679578482e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:29:00,195] [INFO] [timer.py:199:stop] epoch=0/micro_step=6030/global_step=6030, RunningAvgSamplesPerSec=105.41748877018956, CurrSamplesPerSec=108.73361352979491, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:29:00,288] [INFO] [logging.py:96:log_dist] [Rank 0] step=6030, skipped=100, lr=[9.402007153887982e-07, 9.402007153887982e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6029|ppo_ep: 1|act_loss: -0.027130126953125|cri_loss: -0.0133209228515625|unsuper_loss: 0.0
-average reward score: 5.92578125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.09%) |Training time=0.46s (21.28%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6030|ppo_ep: 1|act_loss: -0.023773193359375|cri_loss: -0.0114898681640625|unsuper_loss: 0.0
-average reward score: 5.6640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.26%) |Training time=0.46s (21.12%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6031|ppo_ep: 1|act_loss: 0.0023746490478515625|cri_loss: 0.0015802383422851562|unsuper_loss: 0.0
-average reward score: 4.90625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.11%) |Training time=0.46s (21.23%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6032|ppo_ep: 1|act_loss: -0.03857421875|cri_loss: -0.0188446044921875|unsuper_loss: 0.0
-average reward score: 5.2421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.22%) |Training time=0.46s (21.14%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6033|ppo_ep: 1|act_loss: -0.0029544830322265625|cri_loss: -0.0013036727905273438|unsuper_loss: 0.0
-average reward score: 5.45703125
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.36%) |Training time=0.47s (21.04%) |Others=0.12 (5.60%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6034|ppo_ep: 1|act_loss: -0.012664794921875|cri_loss: -0.006099700927734375|unsuper_loss: 0.0
-average reward score: 5.4609375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.84%) |Training time=0.44s (20.21%) |Others=0.11 (4.94%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6035|ppo_ep: 1|act_loss: 0.019073486328125|cri_loss: 0.00981903076171875|unsuper_loss: 0.0
-average reward score: 5.4609375
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.75%) |Training time=0.44s (20.55%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6036|ppo_ep: 1|act_loss: 0.00531005859375|cri_loss: 0.00304412841796875|unsuper_loss: 0.0
-average reward score: 6.26953125
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.33%) |Training time=0.45s (21.00%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6037|ppo_ep: 1|act_loss: -0.00218963623046875|cri_loss: -0.0005550384521484375|unsuper_loss: 0.0
-average reward score: 5.1015625
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.52%) |Training time=0.44s (20.79%) |Others=0.10 (4.68%)|CurSamplesPerSec=15.03 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6038|ppo_ep: 1|act_loss: -0.0223236083984375|cri_loss: -0.01055908203125|unsuper_loss: 0.0
-average reward score: 5.62890625
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.35%) |Training time=0.45s (20.95%) |Others=0.10 (4.70%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.52
-[2023-04-14 12:29:21,698] [INFO] [logging.py:96:log_dist] [Rank 0] step=6040, skipped=76, lr=[1.765488590913881e-06, 1.765488590913881e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:29:21,716] [INFO] [timer.py:199:stop] epoch=0/micro_step=6040/global_step=6040, RunningAvgSamplesPerSec=105.42680483904256, CurrSamplesPerSec=113.44426751053996, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:29:21,809] [INFO] [logging.py:96:log_dist] [Rank 0] step=6040, skipped=100, lr=[9.326904852647345e-07, 9.326904852647345e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6039|ppo_ep: 1|act_loss: -0.0015201568603515625|cri_loss: -0.0006833076477050781|unsuper_loss: 0.0
-average reward score: 5.03515625
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.47%) |Training time=0.44s (20.87%) |Others=0.10 (4.67%)|CurSamplesPerSec=15.02 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6040|ppo_ep: 1|act_loss: -0.014862060546875|cri_loss: -0.007099151611328125|unsuper_loss: 0.0
-average reward score: 4.80078125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.08%) |Training time=0.43s (20.07%) |Others=0.10 (4.85%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6041|ppo_ep: 1|act_loss: 0.0177154541015625|cri_loss: 0.009063720703125|unsuper_loss: 0.0
-average reward score: 5.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.54%) |Training time=0.45s (20.79%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6042|ppo_ep: 1|act_loss: -0.003208160400390625|cri_loss: -0.0014791488647460938|unsuper_loss: 0.0
-average reward score: 5.3828125
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.76s (76.04%) |Training time=0.46s (19.65%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.79 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6043|ppo_ep: 1|act_loss: -0.0123443603515625|cri_loss: -0.006061553955078125|unsuper_loss: 0.0
-average reward score: 5.890625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.47%) |Training time=0.45s (20.86%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6044|ppo_ep: 1|act_loss: 0.029998779296875|cri_loss: 0.0153350830078125|unsuper_loss: 0.0
-average reward score: 5.40625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.40%) |Training time=0.45s (20.92%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6045|ppo_ep: 1|act_loss: -0.00530242919921875|cri_loss: -0.002552032470703125|unsuper_loss: 0.0
-average reward score: 5.62890625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.28%) |Training time=0.45s (21.05%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6046|ppo_ep: 1|act_loss: 0.0306396484375|cri_loss: 0.016571044921875|unsuper_loss: 0.0
-average reward score: 5.4609375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.33%) |Training time=0.45s (21.01%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6047|ppo_ep: 1|act_loss: 0.002735137939453125|cri_loss: 0.001861572265625|unsuper_loss: 0.0
-average reward score: 4.98828125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.51%) |Training time=0.45s (20.81%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6048|ppo_ep: 1|act_loss: 0.006122589111328125|cri_loss: 0.0031719207763671875|unsuper_loss: 0.0
-average reward score: 4.75390625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.31%) |Training time=0.45s (21.03%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52
-[2023-04-14 12:29:43,343] [INFO] [logging.py:96:log_dist] [Rank 0] step=6050, skipped=76, lr=[1.751147163209664e-06, 1.751147163209664e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:29:43,361] [INFO] [timer.py:199:stop] epoch=0/micro_step=6050/global_step=6050, RunningAvgSamplesPerSec=105.43758064958905, CurrSamplesPerSec=114.32953392591709, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:29:43,454] [INFO] [logging.py:96:log_dist] [Rank 0] step=6050, skipped=100, lr=[9.252034864486062e-07, 9.252034864486062e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6049|ppo_ep: 1|act_loss: -0.04718017578125|cri_loss: -0.023223876953125|unsuper_loss: 0.0
-average reward score: 5.21875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.72%) |Training time=0.44s (20.62%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6050|ppo_ep: 1|act_loss: -0.021270751953125|cri_loss: -0.01024627685546875|unsuper_loss: 0.0
-average reward score: 4.71484375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.55%) |Training time=0.45s (20.78%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6051|ppo_ep: 1|act_loss: 0.01218414306640625|cri_loss: 0.00667572021484375|unsuper_loss: 0.0
-average reward score: 4.984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.46%) |Training time=0.45s (20.85%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6052|ppo_ep: 1|act_loss: -0.0228271484375|cri_loss: -0.0111236572265625|unsuper_loss: 0.0
-average reward score: 4.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.45s (21.14%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6053|ppo_ep: 1|act_loss: -0.0243682861328125|cri_loss: -0.011871337890625|unsuper_loss: 0.0
-average reward score: 5.25390625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.46%) |Training time=0.45s (20.86%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6054|ppo_ep: 1|act_loss: 0.0071868896484375|cri_loss: 0.003875732421875|unsuper_loss: 0.0
-average reward score: 5.2578125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.42%) |Training time=0.45s (20.91%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6055|ppo_ep: 1|act_loss: 0.035552978515625|cri_loss: 0.0187225341796875|unsuper_loss: 0.0
-average reward score: 5.453125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.50%) |Training time=0.44s (20.61%) |Others=0.11 (4.88%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6056|ppo_ep: 1|act_loss: 0.043182373046875|cri_loss: 0.0221099853515625|unsuper_loss: 0.0
-average reward score: 5.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.96%) |Training time=0.44s (20.37%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6057|ppo_ep: 1|act_loss: 0.0064544677734375|cri_loss: 0.003429412841796875|unsuper_loss: 0.0
-average reward score: 4.2578125
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.60s (71.96%) |Training time=0.47s (21.10%) |Others=0.15 (6.94%)|CurSamplesPerSec=14.42 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6058|ppo_ep: 1|act_loss: 0.0033817291259765625|cri_loss: 0.002197265625|unsuper_loss: 0.0
-average reward score: 4.84375
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.35%) |Training time=0.46s (20.20%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.19 |AvgSamplesPerSec=14.52
-[2023-04-14 12:30:05,009] [INFO] [logging.py:96:log_dist] [Rank 0] step=6060, skipped=76, lr=[1.7368512974197543e-06, 1.7368512974197543e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:30:05,028] [INFO] [timer.py:199:stop] epoch=0/micro_step=6060/global_step=6060, RunningAvgSamplesPerSec=105.4466785255788, CurrSamplesPerSec=110.93914526975647, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:30:05,120] [INFO] [logging.py:96:log_dist] [Rank 0] step=6060, skipped=100, lr=[9.177398299157989e-07, 9.177398299157989e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6059|ppo_ep: 1|act_loss: 0.00881195068359375|cri_loss: 0.004749298095703125|unsuper_loss: 0.0
-average reward score: 4.671875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.34%) |Training time=0.45s (21.02%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6060|ppo_ep: 1|act_loss: 0.02923583984375|cri_loss: 0.01485443115234375|unsuper_loss: 0.0
-average reward score: 5.3359375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.95%) |Training time=0.46s (21.39%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6061|ppo_ep: 1|act_loss: -0.0119171142578125|cri_loss: -0.005054473876953125|unsuper_loss: 0.0
-average reward score: 4.85546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.10%) |Training time=0.46s (21.25%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6062|ppo_ep: 1|act_loss: -0.020172119140625|cri_loss: -0.00948333740234375|unsuper_loss: 0.0
-average reward score: 4.79296875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.22%) |Training time=0.47s (21.92%) |Others=0.11 (4.87%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6063|ppo_ep: 1|act_loss: -0.0472412109375|cri_loss: -0.023162841796875|unsuper_loss: 0.0
-average reward score: 5.0703125
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.65s (72.69%) |Training time=0.52s (22.88%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.09 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6064|ppo_ep: 1|act_loss: 0.00022161006927490234|cri_loss: 0.0004191398620605469|unsuper_loss: 0.0
-average reward score: 5.671875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.06%) |Training time=0.48s (22.30%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6065|ppo_ep: 1|act_loss: -0.0066070556640625|cri_loss: -0.002780914306640625|unsuper_loss: 0.0
-average reward score: 5.5
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.17%) |Training time=0.48s (22.18%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6066|ppo_ep: 1|act_loss: -0.01184844970703125|cri_loss: -0.005641937255859375|unsuper_loss: 0.0
-average reward score: 6.296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.26%) |Training time=0.48s (22.08%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6067|ppo_ep: 1|act_loss: 0.00704193115234375|cri_loss: 0.0038299560546875|unsuper_loss: 0.0
-average reward score: 6.09375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.62%) |Training time=0.47s (21.76%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6068|ppo_ep: 1|act_loss: 0.01067352294921875|cri_loss: 0.005550384521484375|unsuper_loss: 0.0
-average reward score: 5.70703125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.10%) |Training time=0.48s (22.25%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52
-[2023-04-14 12:30:26,681] [INFO] [logging.py:96:log_dist] [Rank 0] step=6070, skipped=76, lr=[1.7226012054433704e-06, 1.7226012054433704e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:30:26,699] [INFO] [timer.py:199:stop] epoch=0/micro_step=6070/global_step=6070, RunningAvgSamplesPerSec=105.44069020433436, CurrSamplesPerSec=101.47307651888987, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:30:26,792] [INFO] [logging.py:96:log_dist] [Rank 0] step=6070, skipped=100, lr=[9.102996262957067e-07, 9.102996262957067e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6069|ppo_ep: 1|act_loss: 0.0227508544921875|cri_loss: 0.0118408203125|unsuper_loss: 0.0
-average reward score: 5.07421875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.18%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6070|ppo_ep: 1|act_loss: 0.015289306640625|cri_loss: 0.00815582275390625|unsuper_loss: 0.0
-average reward score: 4.36328125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.43%) |Training time=0.47s (21.86%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6071|ppo_ep: 1|act_loss: 0.0294189453125|cri_loss: 0.01490020751953125|unsuper_loss: 0.0
-average reward score: 4.921875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.87%) |Training time=0.44s (20.45%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6072|ppo_ep: 1|act_loss: -0.010040283203125|cri_loss: -0.004913330078125|unsuper_loss: 0.0
-average reward score: 5.921875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.25%) |Training time=0.48s (22.08%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6073|ppo_ep: 1|act_loss: 0.0377197265625|cri_loss: 0.01934814453125|unsuper_loss: 0.0
-average reward score: 5.796875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.13%) |Training time=0.45s (21.18%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6074|ppo_ep: 1|act_loss: 0.00595855712890625|cri_loss: 0.003124237060546875|unsuper_loss: 0.0
-average reward score: 5.44140625
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.44%) |Training time=0.47s (20.24%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.80 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6075|ppo_ep: 1|act_loss: -0.0601806640625|cri_loss: -0.0290985107421875|unsuper_loss: 0.0
-average reward score: 4.6640625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.94%) |Training time=0.44s (20.38%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6076|ppo_ep: 1|act_loss: 0.04913330078125|cri_loss: 0.02716064453125|unsuper_loss: 0.0
-average reward score: 4.61328125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.77%) |Training time=0.44s (20.57%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6077|ppo_ep: 1|act_loss: 0.01459503173828125|cri_loss: 0.00791168212890625|unsuper_loss: 0.0
-average reward score: 5.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.64%) |Training time=0.44s (20.70%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6078|ppo_ep: 1|act_loss: -0.014251708984375|cri_loss: -0.006336212158203125|unsuper_loss: 0.0
-average reward score: 5.8359375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.94%) |Training time=0.44s (20.39%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
-[2023-04-14 12:30:48,347] [INFO] [logging.py:96:log_dist] [Rank 0] step=6080, skipped=76, lr=[1.7083970985012522e-06, 1.7083970985012522e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:30:48,365] [INFO] [timer.py:199:stop] epoch=0/micro_step=6080/global_step=6080, RunningAvgSamplesPerSec=105.44891692173641, CurrSamplesPerSec=114.66817942601718, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:30:48,458] [INFO] [logging.py:96:log_dist] [Rank 0] step=6080, skipped=100, lr=[9.028829858700974e-07, 9.028829858700974e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6079|ppo_ep: 1|act_loss: -0.020721435546875|cri_loss: -0.01010894775390625|unsuper_loss: 0.0
-average reward score: 5.296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.84%) |Training time=0.44s (20.52%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6080|ppo_ep: 1|act_loss: -0.01166534423828125|cri_loss: -0.005489349365234375|unsuper_loss: 0.0
-average reward score: 5.03515625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.84%) |Training time=0.44s (20.50%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6081|ppo_ep: 1|act_loss: 0.0248565673828125|cri_loss: 0.012939453125|unsuper_loss: 0.0
-average reward score: 6.3203125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.44%) |Training time=0.45s (20.91%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6082|ppo_ep: 1|act_loss: 0.0131683349609375|cri_loss: 0.0073394775390625|unsuper_loss: 0.0
-average reward score: 4.7109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.86%) |Training time=0.44s (20.47%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6083|ppo_ep: 1|act_loss: 0.021148681640625|cri_loss: 0.010986328125|unsuper_loss: 0.0
-average reward score: 6.01953125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.68%) |Training time=0.44s (20.66%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6084|ppo_ep: 1|act_loss: -0.002887725830078125|cri_loss: -0.0010995864868164062|unsuper_loss: 0.0
-average reward score: 5.5859375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.76%) |Training time=0.44s (20.56%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6085|ppo_ep: 1|act_loss: 0.02294921875|cri_loss: 0.012542724609375|unsuper_loss: 0.0
-average reward score: 5.28125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.83%) |Training time=0.44s (20.47%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6086|ppo_ep: 1|act_loss: -0.003551483154296875|cri_loss: -0.0016345977783203125|unsuper_loss: 0.0
-average reward score: 6.015625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.91%) |Training time=0.44s (20.40%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52
-[2023-04-14 12:31:05,617] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 6087|ppo_ep: 1|act_loss: 0.0240478515625|cri_loss: 0.0125885009765625|unsuper_loss: 0.0
-average reward score: 5.265625
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.28%) |Training time=0.44s (20.50%) |Others=0.09 (4.22%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.52
-[2023-04-14 12:31:07,750] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 6088|ppo_ep: 1|act_loss: 0.035614013671875|cri_loss: 0.01824951171875|unsuper_loss: 0.0
-average reward score: 5.05859375
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.12%) |Training time=0.44s (20.68%) |Others=0.09 (4.20%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.52
-[2023-04-14 12:31:09,949] [INFO] [logging.py:96:log_dist] [Rank 0] step=6090, skipped=76, lr=[1.6942391871325284e-06, 1.6942391871325284e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:31:09,968] [INFO] [timer.py:199:stop] epoch=0/micro_step=6090/global_step=6090, RunningAvgSamplesPerSec=105.46141074976802, CurrSamplesPerSec=103.6843468130931, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:31:10,060] [INFO] [logging.py:96:log_dist] [Rank 0] step=6090, skipped=102, lr=[8.969667129160547e-07, 8.969667129160547e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6089|ppo_ep: 1|act_loss: -0.011199951171875|cri_loss: -0.005390167236328125|unsuper_loss: 0.0
-average reward score: 5.703125
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.73s (74.93%) |Training time=0.47s (20.43%) |Others=0.11 (4.64%)|CurSamplesPerSec=13.84 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6090|ppo_ep: 1|act_loss: -0.021484375|cri_loss: -0.01032257080078125|unsuper_loss: 0.0
-average reward score: 5.59375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.71%) |Training time=0.44s (20.63%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6091|ppo_ep: 1|act_loss: -0.01059722900390625|cri_loss: -0.0050811767578125|unsuper_loss: 0.0
-average reward score: 4.9921875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.90%) |Training time=0.44s (20.43%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6092|ppo_ep: 1|act_loss: -0.02288818359375|cri_loss: -0.01100921630859375|unsuper_loss: 0.0
-average reward score: 4.60546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.33%) |Training time=0.43s (20.02%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6093|ppo_ep: 1|act_loss: 0.01059722900390625|cri_loss: 0.00543975830078125|unsuper_loss: 0.0
-average reward score: 5.87890625
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.77%) |Training time=0.45s (19.79%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.23 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6094|ppo_ep: 1|act_loss: 0.0088653564453125|cri_loss: 0.0054931640625|unsuper_loss: 0.0
-average reward score: 5.05859375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.66%) |Training time=0.44s (20.68%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6095|ppo_ep: 1|act_loss: 0.002613067626953125|cri_loss: 0.0014209747314453125|unsuper_loss: 0.0
-average reward score: 5.1796875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.98%) |Training time=0.44s (20.33%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6096|ppo_ep: 1|act_loss: -0.00624847412109375|cri_loss: -0.0026302337646484375|unsuper_loss: 0.0
-average reward score: 4.890625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.66%) |Training time=0.45s (20.70%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6097|ppo_ep: 1|act_loss: 0.03790283203125|cri_loss: 0.0205535888671875|unsuper_loss: 0.0
-average reward score: 5.671875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.72%) |Training time=0.44s (20.63%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6098|ppo_ep: 1|act_loss: -0.00748443603515625|cri_loss: -0.00344085693359375|unsuper_loss: 0.0
-average reward score: 5.6015625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.73%) |Training time=0.44s (20.59%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52
-[2023-04-14 12:31:31,534] [INFO] [logging.py:96:log_dist] [Rank 0] step=6100, skipped=76, lr=[1.6801276811916005e-06, 1.6801276811916005e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:31:31,552] [INFO] [timer.py:199:stop] epoch=0/micro_step=6100/global_step=6100, RunningAvgSamplesPerSec=105.47637239879195, CurrSamplesPerSec=115.30227455669898, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:31:31,645] [INFO] [logging.py:96:log_dist] [Rank 0] step=6100, skipped=102, lr=[8.895927630346729e-07, 8.895927630346729e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6099|ppo_ep: 1|act_loss: -0.0009832382202148438|cri_loss: -2.765655517578125e-05|unsuper_loss: 0.0
-average reward score: 5.52734375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.79%) |Training time=0.44s (20.57%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6100|ppo_ep: 1|act_loss: 0.0250396728515625|cri_loss: 0.012725830078125|unsuper_loss: 0.0
-average reward score: 4.625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.43%) |Training time=0.45s (20.94%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6101|ppo_ep: 1|act_loss: 0.025787353515625|cri_loss: 0.01318359375|unsuper_loss: 0.0
-average reward score: 5.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.52%) |Training time=0.43s (19.81%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6102|ppo_ep: 1|act_loss: 0.00839996337890625|cri_loss: 0.0043182373046875|unsuper_loss: 0.0
-average reward score: 5.15234375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.46%) |Training time=0.43s (19.83%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6103|ppo_ep: 1|act_loss: -0.0005388259887695312|cri_loss: 0.00017642974853515625|unsuper_loss: 0.0
-average reward score: 5.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.88%) |Training time=0.44s (20.47%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6104|ppo_ep: 1|act_loss: 0.0017223358154296875|cri_loss: 0.001003265380859375|unsuper_loss: 0.0
-average reward score: 4.6953125
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.70s (73.49%) |Training time=0.51s (22.14%) |Others=0.10 (4.37%)|CurSamplesPerSec=13.86 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6105|ppo_ep: 1|act_loss: 0.0120697021484375|cri_loss: 0.00655364990234375|unsuper_loss: 0.0
-average reward score: 5.53515625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.03%) |Training time=0.44s (20.32%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6106|ppo_ep: 1|act_loss: -0.000453948974609375|cri_loss: 0.0004172325134277344|unsuper_loss: 0.0
-average reward score: 5.02734375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.35%) |Training time=0.41s (18.98%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6107|ppo_ep: 1|act_loss: 0.0012788772583007812|cri_loss: 0.0009832382202148438|unsuper_loss: 0.0
-average reward score: 5.0625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.86%) |Training time=0.42s (19.38%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6108|ppo_ep: 1|act_loss: -0.0181121826171875|cri_loss: -0.0089263916015625|unsuper_loss: 0.0
-average reward score: 5.05859375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.97%) |Training time=0.44s (20.36%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52
-[2023-04-14 12:31:53,178] [INFO] [logging.py:96:log_dist] [Rank 0] step=6110, skipped=76, lr=[1.666062789845028e-06, 1.666062789845028e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:31:53,197] [INFO] [timer.py:199:stop] epoch=0/micro_step=6110/global_step=6110, RunningAvgSamplesPerSec=105.49268024048469, CurrSamplesPerSec=120.01878555601061, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:31:53,289] [INFO] [logging.py:96:log_dist] [Rank 0] step=6110, skipped=102, lr=[8.822426832734388e-07, 8.822426832734388e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6109|ppo_ep: 1|act_loss: 0.03955078125|cri_loss: 0.0203399658203125|unsuper_loss: 0.0
-average reward score: 5.234375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.27%) |Training time=0.43s (20.06%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6110|ppo_ep: 1|act_loss: -0.0210723876953125|cri_loss: -0.01020050048828125|unsuper_loss: 0.0
-average reward score: 6.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.69%) |Training time=0.44s (20.64%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6111|ppo_ep: 1|act_loss: -0.01251220703125|cri_loss: -0.005100250244140625|unsuper_loss: 0.0
-average reward score: 5.03125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.78%) |Training time=0.44s (20.57%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6112|ppo_ep: 1|act_loss: -0.00618743896484375|cri_loss: -0.002872467041015625|unsuper_loss: 0.0
-average reward score: 5.04296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.10%) |Training time=0.43s (20.21%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6113|ppo_ep: 1|act_loss: 0.021453857421875|cri_loss: 0.0109710693359375|unsuper_loss: 0.0
-average reward score: 4.69921875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.97%) |Training time=0.44s (20.36%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6114|ppo_ep: 1|act_loss: -0.00036072731018066406|cri_loss: 3.933906555175781e-05|unsuper_loss: 0.0
-average reward score: 5.62890625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.72%) |Training time=0.44s (20.60%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6115|ppo_ep: 1|act_loss: -0.0131378173828125|cri_loss: -0.006378173828125|unsuper_loss: 0.0
-average reward score: 5.27734375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.92%) |Training time=0.44s (20.41%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6116|ppo_ep: 1|act_loss: 0.027069091796875|cri_loss: 0.013885498046875|unsuper_loss: 0.0
-average reward score: 4.9609375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.90%) |Training time=0.44s (20.42%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6117|ppo_ep: 1|act_loss: 0.03656005859375|cri_loss: 0.019195556640625|unsuper_loss: 0.0
-average reward score: 5.3828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.26%) |Training time=0.46s (21.11%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6118|ppo_ep: 1|act_loss: -0.01161956787109375|cri_loss: -0.00482177734375|unsuper_loss: 0.0
-average reward score: 5.4453125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.63%) |Training time=0.45s (20.71%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.52
-[2023-04-14 12:32:14,701] [INFO] [logging.py:96:log_dist] [Rank 0] step=6120, skipped=76, lr=[1.6520447215684334e-06, 1.6520447215684334e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:32:14,705] [INFO] [timer.py:199:stop] epoch=0/micro_step=6120/global_step=6120, RunningAvgSamplesPerSec=105.50570028757365, CurrSamplesPerSec=111.2033127913316, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:32:14,868] [INFO] [logging.py:96:log_dist] [Rank 0] step=6120, skipped=102, lr=[8.749165825782657e-07, 8.749165825782657e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6119|ppo_ep: 1|act_loss: -0.0025386810302734375|cri_loss: -0.0011510848999023438|unsuper_loss: 0.0
-average reward score: 5.0234375
--------------------------------------------------------------------------------------
-|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.61s (72.14%) |Training time=0.48s (21.48%) |Others=0.14 (6.38%)|CurSamplesPerSec=14.36 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6120|ppo_ep: 1|act_loss: -0.005218505859375|cri_loss: -0.0017795562744140625|unsuper_loss: 0.0
-average reward score: 5.49609375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.63%) |Training time=0.45s (20.72%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6121|ppo_ep: 1|act_loss: 0.0121917724609375|cri_loss: 0.00702667236328125|unsuper_loss: 0.0
-average reward score: 5.09375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.87%) |Training time=0.44s (20.49%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6122|ppo_ep: 1|act_loss: -0.012176513671875|cri_loss: -0.00586700439453125|unsuper_loss: 0.0
-average reward score: 5.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.87%) |Training time=0.45s (20.53%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6123|ppo_ep: 1|act_loss: -0.003131866455078125|cri_loss: -0.0015077590942382812|unsuper_loss: 0.0
-average reward score: 5.08203125
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.78%) |Training time=0.45s (19.78%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.22 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6124|ppo_ep: 1|act_loss: 0.003070831298828125|cri_loss: 0.002010345458984375|unsuper_loss: 0.0
-average reward score: 5.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.59%) |Training time=0.45s (20.78%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6125|ppo_ep: 1|act_loss: -0.0011262893676757812|cri_loss: -7.152557373046875e-05|unsuper_loss: 0.0
-average reward score: 5.19921875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.95%) |Training time=0.44s (20.38%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6126|ppo_ep: 1|act_loss: 0.0059051513671875|cri_loss: 0.003509521484375|unsuper_loss: 0.0
-average reward score: 4.73828125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.75%) |Training time=0.44s (20.61%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6127|ppo_ep: 1|act_loss: -0.0007395744323730469|cri_loss: -0.0002562999725341797|unsuper_loss: 0.0
-average reward score: 4.9921875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.55%) |Training time=0.45s (20.82%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6128|ppo_ep: 1|act_loss: 0.0227203369140625|cri_loss: 0.0115966796875|unsuper_loss: 0.0
-average reward score: 6.3515625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.71%) |Training time=0.44s (20.63%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52
-[2023-04-14 12:32:36,432] [INFO] [logging.py:96:log_dist] [Rank 0] step=6130, skipped=76, lr=[1.6380736841434013e-06, 1.6380736841434013e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:32:36,450] [INFO] [timer.py:199:stop] epoch=0/micro_step=6130/global_step=6130, RunningAvgSamplesPerSec=105.5171589615916, CurrSamplesPerSec=112.04278099865182, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:32:36,542] [INFO] [logging.py:96:log_dist] [Rank 0] step=6130, skipped=102, lr=[8.676145695396399e-07, 8.676145695396399e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6129|ppo_ep: 1|act_loss: -0.01308441162109375|cri_loss: -0.006229400634765625|unsuper_loss: 0.0
-average reward score: 5.44921875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.58%) |Training time=0.45s (20.81%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6130|ppo_ep: 1|act_loss: 0.003566741943359375|cri_loss: 0.00501251220703125|unsuper_loss: 0.0
-average reward score: 4.9453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.52%) |Training time=0.45s (20.82%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6131|ppo_ep: 1|act_loss: -0.010467529296875|cri_loss: -0.005153656005859375|unsuper_loss: 0.0
-average reward score: 6.0703125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.16%) |Training time=0.43s (20.14%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6132|ppo_ep: 1|act_loss: 0.0159759521484375|cri_loss: 0.00836181640625|unsuper_loss: 0.0
-average reward score: 5.53125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.47s (21.90%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6133|ppo_ep: 1|act_loss: 0.01029205322265625|cri_loss: 0.00539398193359375|unsuper_loss: 0.0
-average reward score: 4.8046875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.15%) |Training time=0.45s (21.18%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6134|ppo_ep: 1|act_loss: 0.028656005859375|cri_loss: 0.01476287841796875|unsuper_loss: 0.0
-average reward score: 5.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.57s (69.15%) |Training time=0.60s (26.43%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.06 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6135|ppo_ep: 1|act_loss: 0.00274658203125|cri_loss: 0.0015954971313476562|unsuper_loss: 0.0
-average reward score: 5.375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.51%) |Training time=0.47s (21.83%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6136|ppo_ep: 1|act_loss: -0.020721435546875|cri_loss: -0.00540924072265625|unsuper_loss: 0.0
-average reward score: 5.40234375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.09%) |Training time=0.48s (22.28%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6137|ppo_ep: 1|act_loss: -0.032318115234375|cri_loss: -0.015716552734375|unsuper_loss: 0.0
-average reward score: 4.9296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.13%) |Training time=0.48s (22.21%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6138|ppo_ep: 1|act_loss: 0.0096435546875|cri_loss: 0.005260467529296875|unsuper_loss: 0.0
-average reward score: 5.15625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.47s (21.95%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52
-[2023-04-14 12:32:58,076] [INFO] [logging.py:96:log_dist] [Rank 0] step=6140, skipped=76, lr=[1.6241498846544112e-06, 1.6241498846544112e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:32:58,094] [INFO] [timer.py:199:stop] epoch=0/micro_step=6140/global_step=6140, RunningAvgSamplesPerSec=105.51132018702846, CurrSamplesPerSec=102.04126119112495, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:32:58,188] [INFO] [logging.py:96:log_dist] [Rank 0] step=6140, skipped=102, lr=[8.60336752391008e-07, 8.60336752391008e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6139|ppo_ep: 1|act_loss: 0.007755279541015625|cri_loss: 0.004730224609375|unsuper_loss: 0.0
-average reward score: 5.875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.17%) |Training time=0.48s (22.20%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6140|ppo_ep: 1|act_loss: 0.006702423095703125|cri_loss: 0.0040435791015625|unsuper_loss: 0.0
-average reward score: 5.20703125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.24%) |Training time=0.48s (22.09%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6141|ppo_ep: 1|act_loss: -0.02569580078125|cri_loss: -0.012451171875|unsuper_loss: 0.0
-average reward score: 5.828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.66%) |Training time=0.47s (21.73%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6142|ppo_ep: 1|act_loss: -0.0069122314453125|cri_loss: -0.00293731689453125|unsuper_loss: 0.0
-average reward score: 5.015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.14%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6143|ppo_ep: 1|act_loss: 0.0230712890625|cri_loss: 0.0119781494140625|unsuper_loss: 0.0
-average reward score: 5.76171875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.46%) |Training time=0.47s (21.88%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6144|ppo_ep: 1|act_loss: 0.0186614990234375|cri_loss: 0.0100250244140625|unsuper_loss: 0.0
-average reward score: 6.359375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.11%) |Training time=0.48s (22.25%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6145|ppo_ep: 1|act_loss: -0.0276031494140625|cri_loss: -0.01342010498046875|unsuper_loss: 0.0
-average reward score: 5.58984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.87%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6146|ppo_ep: 1|act_loss: -0.0023174285888671875|cri_loss: -0.0010881423950195312|unsuper_loss: 0.0
-average reward score: 5.1015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.28%) |Training time=0.48s (22.05%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6147|ppo_ep: 1|act_loss: -0.05145263671875|cri_loss: -0.0251922607421875|unsuper_loss: 0.0
-average reward score: 5.57421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.47s (21.74%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6148|ppo_ep: 1|act_loss: 0.0002200603485107422|cri_loss: 0.0001798868179321289|unsuper_loss: 0.0
-average reward score: 5.83203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.12%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.52
-[2023-04-14 12:33:19,694] [INFO] [logging.py:96:log_dist] [Rank 0] step=6150, skipped=76, lr=[1.6102735294857612e-06, 1.6102735294857612e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:33:19,711] [INFO] [timer.py:199:stop] epoch=0/micro_step=6150/global_step=6150, RunningAvgSamplesPerSec=105.50358234499453, CurrSamplesPerSec=88.18394502043337, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:33:19,859] [INFO] [logging.py:96:log_dist] [Rank 0] step=6150, skipped=102, lr=[8.530832390071778e-07, 8.530832390071778e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6149|ppo_ep: 1|act_loss: 0.0155792236328125|cri_loss: 0.008056640625|unsuper_loss: 0.0
-average reward score: 5.6640625
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.58s (69.94%) |Training time=0.53s (23.28%) |Others=0.15 (6.79%)|CurSamplesPerSec=14.17 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6150|ppo_ep: 1|act_loss: -0.014556884765625|cri_loss: -0.00690460205078125|unsuper_loss: 0.0
-average reward score: 5.390625
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.53%) |Training time=0.48s (21.80%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6151|ppo_ep: 1|act_loss: -0.001575469970703125|cri_loss: -0.0005412101745605469|unsuper_loss: 0.0
-average reward score: 5.90625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.22%) |Training time=0.42s (19.07%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6152|ppo_ep: 1|act_loss: 0.0008678436279296875|cri_loss: 0.0005321502685546875|unsuper_loss: 0.0
-average reward score: 4.9375
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.69s (75.80%) |Training time=0.43s (19.49%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.38 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6153|ppo_ep: 1|act_loss: 0.0113983154296875|cri_loss: 0.0059967041015625|unsuper_loss: 0.0
-average reward score: 5.484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.21%) |Training time=0.43s (20.03%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6154|ppo_ep: 1|act_loss: 0.027252197265625|cri_loss: 0.01384735107421875|unsuper_loss: 0.0
-average reward score: 5.7890625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.07%) |Training time=0.46s (21.25%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6155|ppo_ep: 1|act_loss: 0.01102447509765625|cri_loss: 0.00586700439453125|unsuper_loss: 0.0
-average reward score: 5.109375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.58%) |Training time=0.45s (20.70%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6156|ppo_ep: 1|act_loss: -0.01934814453125|cri_loss: -0.0091705322265625|unsuper_loss: 0.0
-average reward score: 5.5859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.83%) |Training time=0.44s (20.44%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6157|ppo_ep: 1|act_loss: -0.01885986328125|cri_loss: -0.00922393798828125|unsuper_loss: 0.0
-average reward score: 5.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.70%) |Training time=0.43s (19.58%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6158|ppo_ep: 1|act_loss: -0.0258026123046875|cri_loss: -0.01213836669921875|unsuper_loss: 0.0
-average reward score: 5.41796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.72%) |Training time=0.47s (21.64%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.52
-[2023-04-14 12:33:41,552] [INFO] [logging.py:96:log_dist] [Rank 0] step=6160, skipped=76, lr=[1.5964448243185107e-06, 1.5964448243185107e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:33:41,570] [INFO] [timer.py:199:stop] epoch=0/micro_step=6160/global_step=6160, RunningAvgSamplesPerSec=105.51397045659475, CurrSamplesPerSec=105.2155795695511, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:33:41,663] [INFO] [logging.py:96:log_dist] [Rank 0] step=6160, skipped=102, lr=[8.458541369027159e-07, 8.458541369027159e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6159|ppo_ep: 1|act_loss: 0.0109405517578125|cri_loss: 0.005832672119140625|unsuper_loss: 0.0
-average reward score: 4.83984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.83%) |Training time=0.47s (21.55%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6160|ppo_ep: 1|act_loss: 0.0102691650390625|cri_loss: 0.005405426025390625|unsuper_loss: 0.0
-average reward score: 5.4453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.32%) |Training time=0.48s (22.04%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6161|ppo_ep: 1|act_loss: -0.024322509765625|cri_loss: -0.01200103759765625|unsuper_loss: 0.0
-average reward score: 5.23046875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.73%) |Training time=0.47s (21.62%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6162|ppo_ep: 1|act_loss: -0.0107574462890625|cri_loss: -0.00484466552734375|unsuper_loss: 0.0
-average reward score: 6.30078125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.00%) |Training time=0.44s (20.34%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6163|ppo_ep: 1|act_loss: 0.029083251953125|cri_loss: 0.015167236328125|unsuper_loss: 0.0
-average reward score: 5.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.81%) |Training time=0.42s (19.54%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6164|ppo_ep: 1|act_loss: 0.0117950439453125|cri_loss: 0.00669097900390625|unsuper_loss: 0.0
-average reward score: 5.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.53%) |Training time=0.45s (20.84%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6165|ppo_ep: 1|act_loss: 0.02691650390625|cri_loss: 0.01380157470703125|unsuper_loss: 0.0
-average reward score: 5.77734375
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.74s (74.71%) |Training time=0.49s (20.97%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6166|ppo_ep: 1|act_loss: 0.00437164306640625|cri_loss: 0.00235748291015625|unsuper_loss: 0.0
-average reward score: 5.08203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.49%) |Training time=0.45s (20.88%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6167|ppo_ep: 1|act_loss: -0.00687408447265625|cri_loss: -0.0029582977294921875|unsuper_loss: 0.0
-average reward score: 5.45703125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.76%) |Training time=0.45s (20.59%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6168|ppo_ep: 1|act_loss: 0.017120361328125|cri_loss: 0.0088958740234375|unsuper_loss: 0.0
-average reward score: 5.484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.53%) |Training time=0.45s (20.81%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.52
-[2023-04-14 12:34:03,411] [INFO] [logging.py:96:log_dist] [Rank 0] step=6170, skipped=76, lr=[1.5826639741274258e-06, 1.5826639741274258e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:34:03,430] [INFO] [timer.py:199:stop] epoch=0/micro_step=6170/global_step=6170, RunningAvgSamplesPerSec=105.5204895216476, CurrSamplesPerSec=110.71758783026316, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:34:03,522] [INFO] [logging.py:96:log_dist] [Rank 0] step=6170, skipped=102, lr=[8.386495532303557e-07, 8.386495532303557e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6169|ppo_ep: 1|act_loss: -0.0137786865234375|cri_loss: -0.006626129150390625|unsuper_loss: 0.0
-average reward score: 5.09375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.70%) |Training time=0.45s (20.70%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6170|ppo_ep: 1|act_loss: -0.0162811279296875|cri_loss: -0.00791168212890625|unsuper_loss: 0.0
-average reward score: 5.17578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.03%) |Training time=0.44s (20.29%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6171|ppo_ep: 1|act_loss: -0.020263671875|cri_loss: -0.0099029541015625|unsuper_loss: 0.0
-average reward score: 5.26953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.14%) |Training time=0.44s (20.18%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6172|ppo_ep: 1|act_loss: -0.026458740234375|cri_loss: -0.00772857666015625|unsuper_loss: 0.0
-average reward score: 5.36328125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.13%) |Training time=0.44s (20.22%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6173|ppo_ep: 1|act_loss: -0.0079498291015625|cri_loss: -0.003582000732421875|unsuper_loss: 0.0
-average reward score: 5.44921875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.18%) |Training time=0.44s (20.17%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6174|ppo_ep: 1|act_loss: -0.00720977783203125|cri_loss: -0.003360748291015625|unsuper_loss: 0.0
-average reward score: 4.85546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.95%) |Training time=0.44s (20.40%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6175|ppo_ep: 1|act_loss: 0.0032806396484375|cri_loss: 0.0018138885498046875|unsuper_loss: 0.0
-average reward score: 6.015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.95%) |Training time=0.44s (20.39%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6176|ppo_ep: 1|act_loss: 0.010406494140625|cri_loss: 0.005847930908203125|unsuper_loss: 0.0
-average reward score: 5.75
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.63%) |Training time=0.45s (20.71%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6177|ppo_ep: 1|act_loss: 0.001491546630859375|cri_loss: 0.0009002685546875|unsuper_loss: 0.0
-average reward score: 5.765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.40%) |Training time=0.45s (20.66%) |Others=0.11 (4.94%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6178|ppo_ep: 1|act_loss: 0.00827789306640625|cri_loss: 0.004497528076171875|unsuper_loss: 0.0
-average reward score: 5.4296875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.27%) |Training time=0.46s (21.11%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.52
-[2023-04-14 12:34:25,037] [INFO] [logging.py:96:log_dist] [Rank 0] step=6180, skipped=76, lr=[1.5689311831779562e-06, 1.5689311831779562e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:34:25,056] [INFO] [timer.py:199:stop] epoch=0/micro_step=6180/global_step=6180, RunningAvgSamplesPerSec=105.53333925269833, CurrSamplesPerSec=109.81793903372979, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:34:25,148] [INFO] [logging.py:96:log_dist] [Rank 0] step=6180, skipped=102, lr=[8.314695947794054e-07, 8.314695947794054e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6179|ppo_ep: 1|act_loss: 0.0247955322265625|cri_loss: 0.012725830078125|unsuper_loss: 0.0
-average reward score: 4.7109375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.36%) |Training time=0.46s (21.04%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6180|ppo_ep: 1|act_loss: -0.02691650390625|cri_loss: -0.0132904052734375|unsuper_loss: 0.0
-average reward score: 5.65234375
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.63s (71.82%) |Training time=0.53s (23.42%) |Others=0.11 (4.76%)|CurSamplesPerSec=14.07 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6181|ppo_ep: 1|act_loss: -0.01049041748046875|cri_loss: -0.004871368408203125|unsuper_loss: 0.0
-average reward score: 5.95703125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.51%) |Training time=0.46s (20.91%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6182|ppo_ep: 1|act_loss: -8.463859558105469e-05|cri_loss: 0.00012814998626708984|unsuper_loss: 0.0
-average reward score: 5.5625
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.72%) |Training time=0.45s (19.84%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6183|ppo_ep: 1|act_loss: -0.0137481689453125|cri_loss: -0.00630950927734375|unsuper_loss: 0.0
-average reward score: 4.7421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.68%) |Training time=0.45s (20.69%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6184|ppo_ep: 1|act_loss: -0.0121002197265625|cri_loss: -0.00557708740234375|unsuper_loss: 0.0
-average reward score: 4.9453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.58%) |Training time=0.45s (20.78%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6185|ppo_ep: 1|act_loss: 0.01291656494140625|cri_loss: 0.00736236572265625|unsuper_loss: 0.0
-average reward score: 5.94921875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.87%) |Training time=0.44s (20.50%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6186|ppo_ep: 1|act_loss: 0.007572174072265625|cri_loss: 0.00428009033203125|unsuper_loss: 0.0
-average reward score: 5.45703125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.82%) |Training time=0.44s (20.51%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6187|ppo_ep: 1|act_loss: 0.01485443115234375|cri_loss: 0.0076904296875|unsuper_loss: 0.0
-average reward score: 5.8828125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.86%) |Training time=0.44s (20.49%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6188|ppo_ep: 1|act_loss: 0.02923583984375|cri_loss: 0.01490020751953125|unsuper_loss: 0.0
-average reward score: 5.69921875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.55%) |Training time=0.45s (20.82%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.52
-[2023-04-14 12:34:46,845] [INFO] [logging.py:96:log_dist] [Rank 0] step=6190, skipped=76, lr=[1.5552466550231917e-06, 1.5552466550231917e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:34:46,863] [INFO] [timer.py:199:stop] epoch=0/micro_step=6190/global_step=6190, RunningAvgSamplesPerSec=105.54164914219182, CurrSamplesPerSec=115.26355799601869, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:34:46,947] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-[2023-04-14 12:34:46,947] [INFO] [logging.py:96:log_dist] [Rank 0] step=6190, skipped=103, lr=[8.25028774705112e-07, 8.25028774705112e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6189|ppo_ep: 1|act_loss: 0.0374755859375|cri_loss: 0.0200653076171875|unsuper_loss: 0.0
-average reward score: 4.6484375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.31%) |Training time=0.44s (20.52%) |Others=0.09 (4.16%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
-[2023-04-14 12:34:49,092] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 6190|ppo_ep: 1|act_loss: 0.0126953125|cri_loss: 0.00658416748046875|unsuper_loss: 0.0
-average reward score: 5.40625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.20%) |Training time=0.44s (20.63%) |Others=0.09 (4.17%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6191|ppo_ep: 1|act_loss: 0.0193328857421875|cri_loss: 0.01024627685546875|unsuper_loss: 0.0
-average reward score: 5.68359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.85%) |Training time=0.44s (20.51%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6192|ppo_ep: 1|act_loss: -0.034759521484375|cri_loss: -0.0170745849609375|unsuper_loss: 0.0
-average reward score: 5.2109375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.49%) |Training time=0.45s (20.85%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6193|ppo_ep: 1|act_loss: 0.015655517578125|cri_loss: 0.00811004638671875|unsuper_loss: 0.0
-average reward score: 4.49609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.89%) |Training time=0.44s (20.47%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6194|ppo_ep: 1|act_loss: -0.0287933349609375|cri_loss: -0.0138702392578125|unsuper_loss: 0.0
-average reward score: 4.96484375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.39%) |Training time=0.43s (19.91%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6195|ppo_ep: 1|act_loss: 0.029052734375|cri_loss: 0.018585205078125|unsuper_loss: 0.0
-average reward score: 5.20703125
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.08%) |Training time=0.45s (20.21%) |Others=0.15 (6.72%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6196|ppo_ep: 1|act_loss: -0.021270751953125|cri_loss: -0.0104522705078125|unsuper_loss: 0.0
-average reward score: 5.13671875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.92%) |Training time=0.44s (20.37%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6197|ppo_ep: 1|act_loss: 0.0007162094116210938|cri_loss: 0.0006031990051269531|unsuper_loss: 0.0
-average reward score: 5.33984375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.91%) |Training time=0.44s (20.40%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6198|ppo_ep: 1|act_loss: -0.003345489501953125|cri_loss: -0.0012874603271484375|unsuper_loss: 0.0
-average reward score: 5.40234375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.81%) |Training time=0.44s (20.51%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52
-[2023-04-14 12:35:08,385] [INFO] [logging.py:96:log_dist] [Rank 0] step=6200, skipped=76, lr=[1.5416105925008481e-06, 1.5416105925008481e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:35:08,403] [INFO] [timer.py:199:stop] epoch=0/micro_step=6200/global_step=6200, RunningAvgSamplesPerSec=105.5565439468875, CurrSamplesPerSec=118.04580662921714, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:35:08,496] [INFO] [logging.py:96:log_dist] [Rank 0] step=6200, skipped=104, lr=[8.186080645980449e-07, 8.186080645980449e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6199|ppo_ep: 1|act_loss: 0.0003528594970703125|cri_loss: 0.0007967948913574219|unsuper_loss: 0.0
-average reward score: 5.265625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.09%) |Training time=0.43s (20.24%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6200|ppo_ep: 1|act_loss: -0.003871917724609375|cri_loss: -0.00183868408203125|unsuper_loss: 0.0
-average reward score: 5.421875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.73%) |Training time=0.44s (20.62%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6201|ppo_ep: 1|act_loss: 0.004852294921875|cri_loss: 0.0026836395263671875|unsuper_loss: 0.0
-average reward score: 5.0859375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.91%) |Training time=0.44s (20.42%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6202|ppo_ep: 1|act_loss: -0.002704620361328125|cri_loss: -0.00090789794921875|unsuper_loss: 0.0
-average reward score: 5.484375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.78%) |Training time=0.44s (20.55%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6203|ppo_ep: 1|act_loss: -9.34600830078125e-05|cri_loss: 5.0902366638183594e-05|unsuper_loss: 0.0
-average reward score: 5.6015625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.49%) |Training time=0.45s (20.88%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6204|ppo_ep: 1|act_loss: -0.00811004638671875|cri_loss: -0.003986358642578125|unsuper_loss: 0.0
-average reward score: 6.2578125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.71%) |Training time=0.44s (20.62%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6205|ppo_ep: 1|act_loss: -0.014251708984375|cri_loss: -0.006908416748046875|unsuper_loss: 0.0
-average reward score: 5.078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.14%) |Training time=0.45s (20.65%) |Others=0.11 (5.21%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6206|ppo_ep: 1|act_loss: -0.0167236328125|cri_loss: -0.00799560546875|unsuper_loss: 0.0
-average reward score: 5.50390625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.64%) |Training time=0.44s (20.64%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6207|ppo_ep: 1|act_loss: 0.020660400390625|cri_loss: 0.01129913330078125|unsuper_loss: 0.0
-average reward score: 5.0234375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.60%) |Training time=0.47s (21.28%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6208|ppo_ep: 1|act_loss: -0.0110626220703125|cri_loss: -0.005298614501953125|unsuper_loss: 0.0
-average reward score: 4.8359375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.20%) |Training time=0.42s (19.07%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.52
-[2023-04-14 12:35:30,005] [INFO] [logging.py:96:log_dist] [Rank 0] step=6210, skipped=76, lr=[1.5280231977302697e-06, 1.5280231977302697e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:35:30,023] [INFO] [timer.py:199:stop] epoch=0/micro_step=6210/global_step=6210, RunningAvgSamplesPerSec=105.569428626769, CurrSamplesPerSec=115.34894721463071, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:35:30,115] [INFO] [logging.py:96:log_dist] [Rank 0] step=6210, skipped=104, lr=[8.114976217732126e-07, 8.114976217732126e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6209|ppo_ep: 1|act_loss: -0.007843017578125|cri_loss: -0.0037384033203125|unsuper_loss: 0.0
-average reward score: 4.86328125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.97%) |Training time=0.44s (20.37%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6210|ppo_ep: 1|act_loss: 0.001140594482421875|cri_loss: 0.0009160041809082031|unsuper_loss: 0.0
-average reward score: 5.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.79s (75.64%) |Training time=0.48s (20.07%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.50 |AvgSamplesPerSec=14.52
-[2023-04-14 12:35:34,546] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 6211|ppo_ep: 1|act_loss: 0.00507354736328125|cri_loss: 0.0026645660400390625|unsuper_loss: 0.0
-average reward score: 5.3515625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.66s (77.26%) |Training time=0.39s (18.05%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.52
-[2023-04-14 12:35:36,778] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 6212|ppo_ep: 1|act_loss: 0.0276031494140625|cri_loss: 0.0142974853515625|unsuper_loss: 0.0
-average reward score: 5.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.75s (78.19%) |Training time=0.39s (17.28%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.34 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6213|ppo_ep: 1|act_loss: -0.02130126953125|cri_loss: -0.01049041748046875|unsuper_loss: 0.0
-average reward score: 5.25390625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.42%) |Training time=0.45s (20.84%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6214|ppo_ep: 1|act_loss: -0.0183563232421875|cri_loss: -0.00872039794921875|unsuper_loss: 0.0
-average reward score: 5.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.28%) |Training time=0.43s (19.99%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6215|ppo_ep: 1|act_loss: 0.0140380859375|cri_loss: 0.007381439208984375|unsuper_loss: 0.0
-average reward score: 5.203125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.41%) |Training time=0.45s (20.87%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6216|ppo_ep: 1|act_loss: 0.002777099609375|cri_loss: 0.0017156600952148438|unsuper_loss: 0.0
-average reward score: 5.0703125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.76%) |Training time=0.44s (20.52%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6217|ppo_ep: 1|act_loss: 0.0430908203125|cri_loss: 0.0218505859375|unsuper_loss: 0.0
-average reward score: 5.90625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.66%) |Training time=0.42s (19.60%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.52
-epoch: 0|step: 6218|ppo_ep: 1|act_loss: -0.029815673828125|cri_loss: -0.01462554931640625|unsuper_loss: 0.0
-average reward score: 5.37109375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.43%) |Training time=0.42s (19.85%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.52
-[2023-04-14 12:35:51,756] [INFO] [logging.py:96:log_dist] [Rank 0] step=6220, skipped=78, lr=[1.517188458058827e-06, 1.517188458058827e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:35:51,774] [INFO] [timer.py:199:stop] epoch=0/micro_step=6220/global_step=6220, RunningAvgSamplesPerSec=105.5899408595616, CurrSamplesPerSec=112.21889660419636, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:35:51,867] [INFO] [logging.py:96:log_dist] [Rank 0] step=6220, skipped=104, lr=[8.04412206626915e-07, 8.04412206626915e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6219|ppo_ep: 1|act_loss: 0.0148468017578125|cri_loss: 0.007663726806640625|unsuper_loss: 0.0
-average reward score: 5.0625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.39%) |Training time=0.45s (20.92%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6220|ppo_ep: 1|act_loss: -0.00608062744140625|cri_loss: -0.0027713775634765625|unsuper_loss: 0.0
-average reward score: 5.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.70%) |Training time=0.44s (20.59%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6221|ppo_ep: 1|act_loss: -0.0115509033203125|cri_loss: -0.0054931640625|unsuper_loss: 0.0
-average reward score: 5.71875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.80%) |Training time=0.44s (20.50%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6222|ppo_ep: 1|act_loss: -0.00766754150390625|cri_loss: -0.003414154052734375|unsuper_loss: 0.0
-average reward score: 5.7421875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.42%) |Training time=0.45s (20.86%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6223|ppo_ep: 1|act_loss: -0.0185546875|cri_loss: -0.00916290283203125|unsuper_loss: 0.0
-average reward score: 5.73046875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.17%) |Training time=0.46s (21.15%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6224|ppo_ep: 1|act_loss: 0.0041961669921875|cri_loss: 0.0024700164794921875|unsuper_loss: 0.0
-average reward score: 5.71875
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.25%) |Training time=0.51s (23.20%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6225|ppo_ep: 1|act_loss: -0.0008344650268554688|cri_loss: 7.200241088867188e-05|unsuper_loss: 0.0
-average reward score: 5.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.59s (69.27%) |Training time=0.60s (26.34%) |Others=0.10 (4.39%)|CurSamplesPerSec=13.96 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6226|ppo_ep: 1|act_loss: 0.0102691650390625|cri_loss: 0.00655364990234375|unsuper_loss: 0.0
-average reward score: 5.6328125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.62%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6227|ppo_ep: 1|act_loss: 0.0199127197265625|cri_loss: 0.0105438232421875|unsuper_loss: 0.0
-average reward score: 5.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.47s (21.90%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6228|ppo_ep: 1|act_loss: -0.008087158203125|cri_loss: -0.003635406494140625|unsuper_loss: 0.0
-average reward score: 5.31640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.69%) |Training time=0.47s (21.51%) |Others=0.10 (4.80%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
-[2023-04-14 12:36:13,489] [INFO] [logging.py:96:log_dist] [Rank 0] step=6230, skipped=78, lr=[1.5036891722776364e-06, 1.5036891722776364e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:36:13,507] [INFO] [timer.py:199:stop] epoch=0/micro_step=6230/global_step=6230, RunningAvgSamplesPerSec=105.58477727357665, CurrSamplesPerSec=102.88509972863999, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:36:13,600] [INFO] [logging.py:96:log_dist] [Rank 0] step=6230, skipped=104, lr=[7.973519241820982e-07, 7.973519241820982e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6229|ppo_ep: 1|act_loss: -0.00170135498046875|cri_loss: -0.0007963180541992188|unsuper_loss: 0.0
-average reward score: 5.5
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.88%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6230|ppo_ep: 1|act_loss: 0.00777435302734375|cri_loss: 0.004039764404296875|unsuper_loss: 0.0
-average reward score: 5.296875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.57%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6231|ppo_ep: 1|act_loss: -0.0211181640625|cri_loss: -0.0089569091796875|unsuper_loss: 0.0
-average reward score: 5.6328125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.86%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6232|ppo_ep: 1|act_loss: -0.026275634765625|cri_loss: -0.01280975341796875|unsuper_loss: 0.0
-average reward score: 5.24609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.24%) |Training time=0.48s (22.12%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6233|ppo_ep: 1|act_loss: -0.01097869873046875|cri_loss: -0.005405426025390625|unsuper_loss: 0.0
-average reward score: 5.98828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.24%) |Training time=0.48s (22.13%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6234|ppo_ep: 1|act_loss: 0.0022563934326171875|cri_loss: 0.0018453598022460938|unsuper_loss: 0.0
-average reward score: 5.6875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.13%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6235|ppo_ep: 1|act_loss: -0.00632476806640625|cri_loss: -0.002765655517578125|unsuper_loss: 0.0
-average reward score: 4.84765625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.40%) |Training time=0.47s (21.96%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6236|ppo_ep: 1|act_loss: -0.025146484375|cri_loss: -0.0122833251953125|unsuper_loss: 0.0
-average reward score: 5.30078125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (21.98%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6237|ppo_ep: 1|act_loss: -0.003505706787109375|cri_loss: -0.0015869140625|unsuper_loss: 0.0
-average reward score: 4.921875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.11%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6238|ppo_ep: 1|act_loss: -0.01464080810546875|cri_loss: -0.00714874267578125|unsuper_loss: 0.0
-average reward score: 5.1171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.57%) |Training time=0.47s (21.78%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
-[2023-04-14 12:36:35,111] [INFO] [logging.py:96:log_dist] [Rank 0] step=6240, skipped=78, lr=[1.4902391163351402e-06, 1.4902391163351402e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:36:35,130] [INFO] [timer.py:199:stop] epoch=0/micro_step=6240/global_step=6240, RunningAvgSamplesPerSec=105.58013996430111, CurrSamplesPerSec=104.96740574710576, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:36:35,222] [INFO] [logging.py:96:log_dist] [Rank 0] step=6240, skipped=104, lr=[7.903168790891797e-07, 7.903168790891797e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6239|ppo_ep: 1|act_loss: 0.0259552001953125|cri_loss: 0.0141448974609375|unsuper_loss: 0.0
-average reward score: 4.9921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.58%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6240|ppo_ep: 1|act_loss: 0.01824951171875|cri_loss: 0.0101776123046875|unsuper_loss: 0.0
-average reward score: 5.96875
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.60s (70.23%) |Training time=0.58s (25.37%) |Others=0.10 (4.40%)|CurSamplesPerSec=14.05 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6241|ppo_ep: 1|act_loss: 0.02484130859375|cri_loss: 0.012664794921875|unsuper_loss: 0.0
-average reward score: 5.375
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.25%) |Training time=0.48s (21.74%) |Others=0.11 (5.01%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6242|ppo_ep: 1|act_loss: 0.01416015625|cri_loss: 0.007274627685546875|unsuper_loss: 0.0
-average reward score: 5.67578125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.17%) |Training time=0.46s (21.18%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6243|ppo_ep: 1|act_loss: 0.028594970703125|cri_loss: 0.01468658447265625|unsuper_loss: 0.0
-average reward score: 5.66015625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.65%) |Training time=0.44s (20.67%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6244|ppo_ep: 1|act_loss: 0.004192352294921875|cri_loss: 0.002834320068359375|unsuper_loss: 0.0
-average reward score: 5.1171875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.38%) |Training time=0.45s (20.84%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6245|ppo_ep: 1|act_loss: -0.0104217529296875|cri_loss: -0.004413604736328125|unsuper_loss: 0.0
-average reward score: 5.36328125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.79%) |Training time=0.44s (20.56%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6246|ppo_ep: 1|act_loss: -0.004180908203125|cri_loss: -0.0017414093017578125|unsuper_loss: 0.0
-average reward score: 5.09375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.40%) |Training time=0.45s (20.97%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6247|ppo_ep: 1|act_loss: -0.00423431396484375|cri_loss: -0.001888275146484375|unsuper_loss: 0.0
-average reward score: 5.04296875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.69%) |Training time=0.45s (20.67%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6248|ppo_ep: 1|act_loss: -0.008544921875|cri_loss: -0.004192352294921875|unsuper_loss: 0.0
-average reward score: 5.0703125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.72%) |Training time=0.45s (20.64%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
-[2023-04-14 12:36:56,876] [INFO] [logging.py:96:log_dist] [Rank 0] step=6250, skipped=78, lr=[1.4768384895936108e-06, 1.4768384895936108e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:36:56,894] [INFO] [timer.py:199:stop] epoch=0/micro_step=6250/global_step=6250, RunningAvgSamplesPerSec=105.58190779052778, CurrSamplesPerSec=111.68467475926893, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:36:56,986] [INFO] [logging.py:96:log_dist] [Rank 0] step=6250, skipped=104, lr=[7.833071756244998e-07, 7.833071756244998e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6249|ppo_ep: 1|act_loss: 0.006694793701171875|cri_loss: 0.0036468505859375|unsuper_loss: 0.0
-average reward score: 5.09375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.61%) |Training time=0.45s (20.78%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6250|ppo_ep: 1|act_loss: -0.013153076171875|cri_loss: -0.006481170654296875|unsuper_loss: 0.0
-average reward score: 5.21484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.69%) |Training time=0.45s (20.66%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6251|ppo_ep: 1|act_loss: -0.00647735595703125|cri_loss: -0.0031337738037109375|unsuper_loss: 0.0
-average reward score: 5.203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.30%) |Training time=0.46s (21.08%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6252|ppo_ep: 1|act_loss: 0.02947998046875|cri_loss: 0.01508331298828125|unsuper_loss: 0.0
-average reward score: 5.20703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.86%) |Training time=0.45s (20.54%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6253|ppo_ep: 1|act_loss: 0.004730224609375|cri_loss: 0.002590179443359375|unsuper_loss: 0.0
-average reward score: 5.40625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.08%) |Training time=0.44s (20.25%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6254|ppo_ep: 1|act_loss: 0.01178741455078125|cri_loss: 0.006591796875|unsuper_loss: 0.0
-average reward score: 5.28515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.62%) |Training time=0.45s (20.75%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6255|ppo_ep: 1|act_loss: 0.02813720703125|cri_loss: 0.01495361328125|unsuper_loss: 0.0
-average reward score: 5.734375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.92%) |Training time=0.44s (20.41%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6256|ppo_ep: 1|act_loss: 0.0218048095703125|cri_loss: 0.01131439208984375|unsuper_loss: 0.0
-average reward score: 6.625
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.11%) |Training time=0.46s (19.58%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.75 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6257|ppo_ep: 1|act_loss: 0.02520751953125|cri_loss: 0.01290130615234375|unsuper_loss: 0.0
-average reward score: 5.40234375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.64%) |Training time=0.45s (20.71%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6258|ppo_ep: 1|act_loss: 0.001255035400390625|cri_loss: 0.000720977783203125|unsuper_loss: 0.0
-average reward score: 5.44140625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.91%) |Training time=0.44s (20.41%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.53
-[2023-04-14 12:37:18,637] [INFO] [logging.py:96:log_dist] [Rank 0] step=6260, skipped=78, lr=[1.4634874906826658e-06, 1.4634874906826658e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:37:18,654] [INFO] [timer.py:199:stop] epoch=0/micro_step=6260/global_step=6260, RunningAvgSamplesPerSec=105.59232439178967, CurrSamplesPerSec=109.13119463360097, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:37:18,747] [INFO] [logging.py:96:log_dist] [Rank 0] step=6260, skipped=104, lr=[7.763229176887724e-07, 7.763229176887724e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6259|ppo_ep: 1|act_loss: 0.00228118896484375|cri_loss: 0.0017452239990234375|unsuper_loss: 0.0
-average reward score: 5.4453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.32%) |Training time=0.46s (21.07%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6260|ppo_ep: 1|act_loss: 0.00464630126953125|cri_loss: 0.0027523040771484375|unsuper_loss: 0.0
-average reward score: 5.72265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.58%) |Training time=0.45s (20.78%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6261|ppo_ep: 1|act_loss: -0.013946533203125|cri_loss: -0.0068206787109375|unsuper_loss: 0.0
-average reward score: 5.02734375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.50%) |Training time=0.45s (20.86%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6262|ppo_ep: 1|act_loss: 0.0018863677978515625|cri_loss: 0.001041412353515625|unsuper_loss: 0.0
-average reward score: 5.125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.55%) |Training time=0.45s (20.79%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6263|ppo_ep: 1|act_loss: -0.0026092529296875|cri_loss: -0.000988006591796875|unsuper_loss: 0.0
-average reward score: 5.7265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.44%) |Training time=0.45s (20.92%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6264|ppo_ep: 1|act_loss: 0.00653076171875|cri_loss: 0.003620147705078125|unsuper_loss: 0.0
-average reward score: 5.6796875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.71%) |Training time=0.44s (20.62%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6265|ppo_ep: 1|act_loss: -0.00452423095703125|cri_loss: -0.0022068023681640625|unsuper_loss: 0.0
-average reward score: 6.4296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.61%) |Training time=0.45s (20.75%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6266|ppo_ep: 1|act_loss: -0.0116119384765625|cri_loss: -0.005718231201171875|unsuper_loss: 0.0
-average reward score: 5.76953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.49%) |Training time=0.45s (20.87%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6267|ppo_ep: 1|act_loss: -0.0115966796875|cri_loss: -0.005588531494140625|unsuper_loss: 0.0
-average reward score: 5.59375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.69%) |Training time=0.44s (20.67%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6268|ppo_ep: 1|act_loss: -0.00019741058349609375|cri_loss: 0.00042819976806640625|unsuper_loss: 0.0
-average reward score: 6.16015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.41%) |Training time=0.45s (20.83%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
-[2023-04-14 12:37:40,204] [INFO] [logging.py:96:log_dist] [Rank 0] step=6270, skipped=78, lr=[1.4501863174963161e-06, 1.4501863174963161e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:37:40,222] [INFO] [timer.py:199:stop] epoch=0/micro_step=6270/global_step=6270, RunningAvgSamplesPerSec=105.6018430806941, CurrSamplesPerSec=111.38955300678872, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:37:40,326] [INFO] [logging.py:96:log_dist] [Rank 0] step=6270, skipped=104, lr=[7.693642088055492e-07, 7.693642088055492e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6269|ppo_ep: 1|act_loss: 0.0145263671875|cri_loss: 0.007442474365234375|unsuper_loss: 0.0
-average reward score: 5.6796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.27%) |Training time=0.45s (20.72%) |Others=0.11 (5.01%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6270|ppo_ep: 1|act_loss: -0.026397705078125|cri_loss: -0.01293182373046875|unsuper_loss: 0.0
-average reward score: 5.27734375
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.56%) |Training time=0.48s (21.89%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6271|ppo_ep: 1|act_loss: 0.002685546875|cri_loss: 0.0016155242919921875|unsuper_loss: 0.0
-average reward score: 5.8125
--------------------------------------------------------------------------------------
-|E2E latency=2.45s |Gather latency=0.00s (0.00%) |Generate time=1.87s (76.54%) |Training time=0.47s (19.35%) |Others=0.10 (4.11%)|CurSamplesPerSec=13.08 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6272|ppo_ep: 1|act_loss: -0.01654052734375|cri_loss: -0.0077056884765625|unsuper_loss: 0.0
-average reward score: 5.28125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.10%) |Training time=0.46s (21.28%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6273|ppo_ep: 1|act_loss: -0.0284576416015625|cri_loss: -0.01349639892578125|unsuper_loss: 0.0
-average reward score: 5.84375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.22%) |Training time=0.46s (21.17%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6274|ppo_ep: 1|act_loss: -0.025360107421875|cri_loss: -0.01253509521484375|unsuper_loss: 0.0
-average reward score: 6.06640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.41%) |Training time=0.45s (20.97%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6275|ppo_ep: 1|act_loss: 0.0177001953125|cri_loss: 0.009307861328125|unsuper_loss: 0.0
-average reward score: 5.69921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.19%) |Training time=0.46s (21.19%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6276|ppo_ep: 1|act_loss: 0.0011444091796875|cri_loss: 0.0008845329284667969|unsuper_loss: 0.0
-average reward score: 5.859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.13%) |Training time=0.46s (21.26%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6277|ppo_ep: 1|act_loss: 0.042236328125|cri_loss: 0.021392822265625|unsuper_loss: 0.0
-average reward score: 5.07421875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.88%) |Training time=0.47s (21.51%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6278|ppo_ep: 1|act_loss: -0.0009446144104003906|cri_loss: -0.00019884109497070312|unsuper_loss: 0.0
-average reward score: 5.7421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.17%) |Training time=0.46s (21.22%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
-[2023-04-14 12:38:02,230] [INFO] [logging.py:96:log_dist] [Rank 0] step=6280, skipped=78, lr=[1.4369351671900373e-06, 1.4369351671900373e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:38:02,249] [INFO] [timer.py:199:stop] epoch=0/micro_step=6280/global_step=6280, RunningAvgSamplesPerSec=105.6027195287297, CurrSamplesPerSec=109.6824271427334, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:38:02,342] [INFO] [logging.py:96:log_dist] [Rank 0] step=6280, skipped=104, lr=[7.624311521196831e-07, 7.624311521196831e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6279|ppo_ep: 1|act_loss: 0.0102691650390625|cri_loss: 0.00617218017578125|unsuper_loss: 0.0
-average reward score: 5.51171875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.30%) |Training time=0.45s (21.06%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6280|ppo_ep: 1|act_loss: 0.00545501708984375|cri_loss: 0.003635406494140625|unsuper_loss: 0.0
-average reward score: 6.10546875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.88%) |Training time=0.45s (20.48%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6281|ppo_ep: 1|act_loss: -0.0016050338745117188|cri_loss: -0.00028705596923828125|unsuper_loss: 0.0
-average reward score: 6.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.49%) |Training time=0.45s (20.88%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6282|ppo_ep: 1|act_loss: 0.011962890625|cri_loss: 0.006378173828125|unsuper_loss: 0.0
-average reward score: 5.6328125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.45%) |Training time=0.45s (20.91%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6283|ppo_ep: 1|act_loss: -0.0033092498779296875|cri_loss: -0.0013580322265625|unsuper_loss: 0.0
-average reward score: 5.05078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.20%) |Training time=0.46s (21.16%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6284|ppo_ep: 1|act_loss: 0.0046234130859375|cri_loss: 0.002899169921875|unsuper_loss: 0.0
-average reward score: 5.61328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.01%) |Training time=0.46s (21.26%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6285|ppo_ep: 1|act_loss: -0.03997802734375|cri_loss: -0.0195159912109375|unsuper_loss: 0.0
-average reward score: 4.953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.50%) |Training time=0.45s (20.85%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6286|ppo_ep: 1|act_loss: 0.00927734375|cri_loss: 0.004932403564453125|unsuper_loss: 0.0
-average reward score: 5.30859375
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.74s (74.91%) |Training time=0.47s (20.44%) |Others=0.11 (4.64%)|CurSamplesPerSec=13.78 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6287|ppo_ep: 1|act_loss: -0.0032196044921875|cri_loss: -0.0010728836059570312|unsuper_loss: 0.0
-average reward score: 5.1796875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.68%) |Training time=0.45s (20.68%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6288|ppo_ep: 1|act_loss: -0.00197601318359375|cri_loss: -0.0006690025329589844|unsuper_loss: 0.0
-average reward score: 5.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.30%) |Training time=0.46s (21.07%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
-[2023-04-14 12:38:24,029] [INFO] [logging.py:96:log_dist] [Rank 0] step=6290, skipped=78, lr=[1.4237342361778406e-06, 1.4237342361778406e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:38:24,048] [INFO] [timer.py:199:stop] epoch=0/micro_step=6290/global_step=6290, RunningAvgSamplesPerSec=105.6091977124146, CurrSamplesPerSec=111.14943981290935, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:38:24,140] [INFO] [logging.py:96:log_dist] [Rank 0] step=6290, skipped=104, lr=[7.555238503958001e-07, 7.555238503958001e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6289|ppo_ep: 1|act_loss: 0.033782958984375|cri_loss: 0.01788330078125|unsuper_loss: 0.0
-average reward score: 6.03125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.53%) |Training time=0.45s (20.86%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6290|ppo_ep: 1|act_loss: -0.0182647705078125|cri_loss: -0.00897979736328125|unsuper_loss: 0.0
-average reward score: 6.25
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.26%) |Training time=0.46s (21.12%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
-[2023-04-14 12:38:28,458] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 6291|ppo_ep: 1|act_loss: 0.013763427734375|cri_loss: 0.007251739501953125|unsuper_loss: 0.0
-average reward score: 6.2109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.90%) |Training time=0.45s (20.95%) |Others=0.09 (4.14%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.53
-[2023-04-14 12:38:30,610] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 6292|ppo_ep: 1|act_loss: -0.0005426406860351562|cri_loss: -1.621246337890625e-05|unsuper_loss: 0.0
-average reward score: 5.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.67%) |Training time=0.46s (21.17%) |Others=0.09 (4.16%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6293|ppo_ep: 1|act_loss: 0.0010776519775390625|cri_loss: 0.0010042190551757812|unsuper_loss: 0.0
-average reward score: 5.796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.28%) |Training time=0.46s (21.10%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6294|ppo_ep: 1|act_loss: 0.04022216796875|cri_loss: 0.02130126953125|unsuper_loss: 0.0
-average reward score: 4.84375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.41%) |Training time=0.45s (20.97%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6295|ppo_ep: 1|act_loss: -0.014068603515625|cri_loss: -0.006320953369140625|unsuper_loss: 0.0
-average reward score: 5.421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.44%) |Training time=0.45s (20.93%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6296|ppo_ep: 1|act_loss: 0.002857208251953125|cri_loss: 0.001953125|unsuper_loss: 0.0
-average reward score: 5.296875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.60%) |Training time=0.45s (20.76%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6297|ppo_ep: 1|act_loss: -0.0020904541015625|cri_loss: -0.0003910064697265625|unsuper_loss: 0.0
-average reward score: 5.08984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.35%) |Training time=0.46s (21.03%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6298|ppo_ep: 1|act_loss: -0.003864288330078125|cri_loss: 0.001556396484375|unsuper_loss: 0.0
-average reward score: 5.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.35%) |Training time=0.45s (20.94%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
-[2023-04-14 12:38:45,675] [INFO] [logging.py:96:log_dist] [Rank 0] step=6300, skipped=78, lr=[1.4105837201293704e-06, 1.4105837201293704e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:38:45,694] [INFO] [timer.py:199:stop] epoch=0/micro_step=6300/global_step=6300, RunningAvgSamplesPerSec=105.61516707150545, CurrSamplesPerSec=107.56875275999228, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:38:45,786] [INFO] [logging.py:96:log_dist] [Rank 0] step=6300, skipped=106, lr=[7.500166214034776e-07, 7.500166214034776e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6299|ppo_ep: 1|act_loss: 0.009979248046875|cri_loss: 0.00518798828125|unsuper_loss: 0.0
-average reward score: 5.9375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.32%) |Training time=0.46s (21.09%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6300|ppo_ep: 1|act_loss: 0.019439697265625|cri_loss: 0.010284423828125|unsuper_loss: 0.0
-average reward score: 4.953125
--------------------------------------------------------------------------------------
-|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.65s (73.93%) |Training time=0.45s (20.32%) |Others=0.13 (5.75%)|CurSamplesPerSec=14.35 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6301|ppo_ep: 1|act_loss: -0.00232696533203125|cri_loss: -0.00095367431640625|unsuper_loss: 0.0
-average reward score: 5.69921875
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.66s (73.72%) |Training time=0.49s (21.79%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.22 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6302|ppo_ep: 1|act_loss: 0.0011081695556640625|cri_loss: 0.0012903213500976562|unsuper_loss: 0.0
-average reward score: 5.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.54%) |Training time=0.45s (20.82%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6303|ppo_ep: 1|act_loss: -0.022674560546875|cri_loss: -0.0111541748046875|unsuper_loss: 0.0
-average reward score: 5.45703125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.61%) |Training time=0.45s (20.74%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6304|ppo_ep: 1|act_loss: -0.00984954833984375|cri_loss: -0.004364013671875|unsuper_loss: 0.0
-average reward score: 6.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.52%) |Training time=0.45s (20.85%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6305|ppo_ep: 1|act_loss: -0.0477294921875|cri_loss: -0.02325439453125|unsuper_loss: 0.0
-average reward score: 5.34765625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.43%) |Training time=0.45s (20.94%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6306|ppo_ep: 1|act_loss: -0.0142974853515625|cri_loss: -0.007049560546875|unsuper_loss: 0.0
-average reward score: 5.67578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.67%) |Training time=0.45s (20.69%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6307|ppo_ep: 1|act_loss: 0.00475311279296875|cri_loss: 0.002620697021484375|unsuper_loss: 0.0
-average reward score: 5.78515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.35%) |Training time=0.46s (21.01%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6308|ppo_ep: 1|act_loss: 0.0255126953125|cri_loss: 0.01300811767578125|unsuper_loss: 0.0
-average reward score: 5.3046875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.33%) |Training time=0.45s (20.99%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
-[2023-04-14 12:39:07,456] [INFO] [logging.py:96:log_dist] [Rank 0] step=6310, skipped=78, lr=[1.3974838139670003e-06, 1.3974838139670003e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:39:07,475] [INFO] [timer.py:199:stop] epoch=0/micro_step=6310/global_step=6310, RunningAvgSamplesPerSec=105.62331246396326, CurrSamplesPerSec=111.24064007134373, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:39:07,567] [INFO] [logging.py:96:log_dist] [Rank 0] step=6310, skipped=106, lr=[7.431559363585e-07, 7.431559363585e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6309|ppo_ep: 1|act_loss: 0.0016880035400390625|cri_loss: 0.0015850067138671875|unsuper_loss: 0.0
-average reward score: 5.26953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.52%) |Training time=0.45s (20.83%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6310|ppo_ep: 1|act_loss: 0.017578125|cri_loss: 0.0090484619140625|unsuper_loss: 0.0
-average reward score: 5.4765625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.51%) |Training time=0.45s (20.83%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6311|ppo_ep: 1|act_loss: 0.087646484375|cri_loss: 0.044891357421875|unsuper_loss: 0.0
-average reward score: 5.66796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.30%) |Training time=0.45s (21.00%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6312|ppo_ep: 1|act_loss: 0.0124053955078125|cri_loss: 0.006786346435546875|unsuper_loss: 0.0
-average reward score: 5.42578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.37%) |Training time=0.45s (20.98%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
-[2023-04-14 12:39:16,100] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 6313|ppo_ep: 1|act_loss: 0.0188751220703125|cri_loss: 0.00971221923828125|unsuper_loss: 0.0
-average reward score: 5.96875
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.45%) |Training time=0.42s (19.87%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.53
-[2023-04-14 12:39:18,241] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 6314|ppo_ep: 1|act_loss: 0.03717041015625|cri_loss: 0.019134521484375|unsuper_loss: 0.0
-average reward score: 6.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.14%) |Training time=0.43s (19.97%) |Others=0.10 (4.89%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6315|ppo_ep: 1|act_loss: 0.008331298828125|cri_loss: 0.004657745361328125|unsuper_loss: 0.0
-average reward score: 6.171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.86%) |Training time=0.44s (20.43%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.53
-[2023-04-14 12:39:22,730] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
-epoch: 0|step: 6316|ppo_ep: 1|act_loss: 0.00103759765625|cri_loss: 0.0007176399230957031|unsuper_loss: 0.0
-average reward score: 5.828125
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.84s (79.41%) |Training time=0.37s (16.09%) |Others=0.10 (4.50%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6317|ppo_ep: 1|act_loss: 0.035888671875|cri_loss: 0.01885986328125|unsuper_loss: 0.0
-average reward score: 5.78515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.86%) |Training time=0.40s (18.42%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6318|ppo_ep: 1|act_loss: 0.0038299560546875|cri_loss: 0.0024509429931640625|unsuper_loss: 0.0
-average reward score: 4.94140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.94%) |Training time=0.46s (21.41%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
-[2023-04-14 12:39:29,229] [INFO] [logging.py:96:log_dist] [Rank 0] step=6320, skipped=81, lr=[1.3883440965450009e-06, 1.3883440965450009e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:39:29,247] [INFO] [timer.py:199:stop] epoch=0/micro_step=6320/global_step=6320, RunningAvgSamplesPerSec=105.64005873369146, CurrSamplesPerSec=108.1665674061908, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:39:29,341] [INFO] [logging.py:96:log_dist] [Rank 0] step=6320, skipped=106, lr=[7.363212919807011e-07, 7.363212919807011e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6319|ppo_ep: 1|act_loss: -0.0096893310546875|cri_loss: -0.004680633544921875|unsuper_loss: 0.0
-average reward score: 5.80078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.25%) |Training time=0.46s (21.10%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6320|ppo_ep: 1|act_loss: -0.06494140625|cri_loss: -0.031707763671875|unsuper_loss: 0.0
-average reward score: 5.1640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.05%) |Training time=0.46s (21.31%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6321|ppo_ep: 1|act_loss: -0.02008056640625|cri_loss: -0.0095672607421875|unsuper_loss: 0.0
-average reward score: 4.859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.56%) |Training time=0.45s (20.77%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6322|ppo_ep: 1|act_loss: -0.02349853515625|cri_loss: -0.0114288330078125|unsuper_loss: 0.0
-average reward score: 5.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.68%) |Training time=0.44s (20.51%) |Others=0.10 (4.80%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6323|ppo_ep: 1|act_loss: -0.0170745849609375|cri_loss: -0.00827789306640625|unsuper_loss: 0.0
-average reward score: 5.53125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.03%) |Training time=0.46s (21.33%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6324|ppo_ep: 1|act_loss: -0.0194549560546875|cri_loss: -0.00948333740234375|unsuper_loss: 0.0
-average reward score: 5.41796875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.99%) |Training time=0.47s (21.37%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6325|ppo_ep: 1|act_loss: -0.0080718994140625|cri_loss: -0.00390625|unsuper_loss: 0.0
-average reward score: 5.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.29%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6326|ppo_ep: 1|act_loss: 0.024169921875|cri_loss: 0.0124053955078125|unsuper_loss: 0.0
-average reward score: 5.6640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.53%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6327|ppo_ep: 1|act_loss: 0.0140838623046875|cri_loss: 0.007419586181640625|unsuper_loss: 0.0
-average reward score: 5.3828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.36%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6328|ppo_ep: 1|act_loss: 0.037841796875|cri_loss: 0.0194091796875|unsuper_loss: 0.0
-average reward score: 5.9453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.82%) |Training time=0.47s (21.52%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
-[2023-04-14 12:39:50,932] [INFO] [logging.py:96:log_dist] [Rank 0] step=6330, skipped=81, lr=[1.3753306724110857e-06, 1.3753306724110857e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:39:50,951] [INFO] [timer.py:199:stop] epoch=0/micro_step=6330/global_step=6330, RunningAvgSamplesPerSec=105.64268490460414, CurrSamplesPerSec=102.43962470224307, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:39:51,059] [INFO] [logging.py:96:log_dist] [Rank 0] step=6330, skipped=106, lr=[7.29512789575999e-07, 7.29512789575999e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6329|ppo_ep: 1|act_loss: 0.0162200927734375|cri_loss: 0.00836944580078125|unsuper_loss: 0.0
-average reward score: 5.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.32%) |Training time=0.48s (21.44%) |Others=0.12 (5.24%)|CurSamplesPerSec=14.43 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6330|ppo_ep: 1|act_loss: 0.0054931640625|cri_loss: 0.0031185150146484375|unsuper_loss: 0.0
-average reward score: 5.328125
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.42%) |Training time=0.45s (20.12%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.15 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6331|ppo_ep: 1|act_loss: -0.003711700439453125|cri_loss: -0.0015411376953125|unsuper_loss: 0.0
-average reward score: 5.39453125
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.31%) |Training time=0.45s (19.34%) |Others=0.10 (4.35%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6332|ppo_ep: 1|act_loss: 0.01558685302734375|cri_loss: 0.00820159912109375|unsuper_loss: 0.0
-average reward score: 5.1015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.46s (21.13%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6333|ppo_ep: 1|act_loss: 0.0157623291015625|cri_loss: 0.00823974609375|unsuper_loss: 0.0
-average reward score: 5.14453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.05%) |Training time=0.46s (21.31%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6334|ppo_ep: 1|act_loss: -0.005840301513671875|cri_loss: -0.002635955810546875|unsuper_loss: 0.0
-average reward score: 5.44921875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.77%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6335|ppo_ep: 1|act_loss: -0.0248260498046875|cri_loss: -0.01216888427734375|unsuper_loss: 0.0
-average reward score: 5.03125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.16%) |Training time=0.46s (21.21%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6336|ppo_ep: 1|act_loss: -0.03643798828125|cri_loss: -0.0167236328125|unsuper_loss: 0.0
-average reward score: 5.16015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.65%) |Training time=0.47s (21.70%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6337|ppo_ep: 1|act_loss: -0.00493621826171875|cri_loss: -0.0005397796630859375|unsuper_loss: 0.0
-average reward score: 5.70703125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.68%) |Training time=0.47s (21.67%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
-[2023-04-14 12:40:10,666] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192
-epoch: 0|step: 6338|ppo_ep: 1|act_loss: 0.005001068115234375|cri_loss: 0.0028247833251953125|unsuper_loss: 0.0
-average reward score: 5.953125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.45%) |Training time=0.45s (20.89%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.53
-[2023-04-14 12:40:12,806] [INFO] [logging.py:96:log_dist] [Rank 0] step=6340, skipped=82, lr=[1.3636623034290526e-06, 1.3636623034290526e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:40:12,824] [INFO] [timer.py:199:stop] epoch=0/micro_step=6340/global_step=6340, RunningAvgSamplesPerSec=105.64557854690106, CurrSamplesPerSec=105.26971448381123, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:40:12,917] [INFO] [logging.py:96:log_dist] [Rank 0] step=6340, skipped=106, lr=[7.227305300628223e-07, 7.227305300628223e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6339|ppo_ep: 1|act_loss: -0.0091400146484375|cri_loss: -0.004283905029296875|unsuper_loss: 0.0
-average reward score: 5.25390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.63%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6340|ppo_ep: 1|act_loss: -0.0142822265625|cri_loss: -0.007076263427734375|unsuper_loss: 0.0
-average reward score: 5.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.87%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6341|ppo_ep: 1|act_loss: 0.003917694091796875|cri_loss: 0.00240325927734375|unsuper_loss: 0.0
-average reward score: 4.93359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.12%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6342|ppo_ep: 1|act_loss: -0.0396728515625|cri_loss: -0.0185089111328125|unsuper_loss: 0.0
-average reward score: 5.23828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.14%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6343|ppo_ep: 1|act_loss: -0.007843017578125|cri_loss: -0.003810882568359375|unsuper_loss: 0.0
-average reward score: 5.45703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.18%) |Training time=0.48s (22.19%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6344|ppo_ep: 1|act_loss: 0.00955963134765625|cri_loss: 0.005218505859375|unsuper_loss: 0.0
-average reward score: 5.94921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.80%) |Training time=0.47s (21.58%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6345|ppo_ep: 1|act_loss: 0.01165771484375|cri_loss: 0.006011962890625|unsuper_loss: 0.0
-average reward score: 5.3046875
--------------------------------------------------------------------------------------
-|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.23%) |Training time=0.48s (21.42%) |Others=0.16 (7.35%)|CurSamplesPerSec=14.34 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6346|ppo_ep: 1|act_loss: 0.035186767578125|cri_loss: 0.01885986328125|unsuper_loss: 0.0
-average reward score: 5.18359375
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.67s (74.30%) |Training time=0.48s (21.24%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.23 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6347|ppo_ep: 1|act_loss: -0.0005655288696289062|cri_loss: -0.00018525123596191406|unsuper_loss: 0.0
-average reward score: 4.78125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.74%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6348|ppo_ep: 1|act_loss: 0.0498046875|cri_loss: 0.0255126953125|unsuper_loss: 0.0
-average reward score: 5.49609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (21.98%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.53
-[2023-04-14 12:40:34,627] [INFO] [logging.py:96:log_dist] [Rank 0] step=6350, skipped=82, lr=[1.3507461951905125e-06, 1.3507461951905125e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:40:34,645] [INFO] [timer.py:199:stop] epoch=0/micro_step=6350/global_step=6350, RunningAvgSamplesPerSec=105.63979259857506, CurrSamplesPerSec=102.75269498806098, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:40:34,738] [INFO] [logging.py:96:log_dist] [Rank 0] step=6350, skipped=106, lr=[7.159746139706194e-07, 7.159746139706194e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6349|ppo_ep: 1|act_loss: -0.00492095947265625|cri_loss: -0.0022411346435546875|unsuper_loss: 0.0
-average reward score: 5.87890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.91%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6350|ppo_ep: 1|act_loss: -0.01776123046875|cri_loss: -0.0073699951171875|unsuper_loss: 0.0
-average reward score: 4.90234375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.83%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6351|ppo_ep: 1|act_loss: 0.020477294921875|cri_loss: 0.01078033447265625|unsuper_loss: 0.0
-average reward score: 4.96484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.82%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6352|ppo_ep: 1|act_loss: 0.053955078125|cri_loss: 0.0275115966796875|unsuper_loss: 0.0
-average reward score: 4.65625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.76%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6353|ppo_ep: 1|act_loss: 0.04095458984375|cri_loss: 0.021240234375|unsuper_loss: 0.0
-average reward score: 5.26953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.47s (21.86%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6354|ppo_ep: 1|act_loss: -0.0173492431640625|cri_loss: -0.00838470458984375|unsuper_loss: 0.0
-average reward score: 5.28125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.52%) |Training time=0.47s (21.82%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6355|ppo_ep: 1|act_loss: -0.022491455078125|cri_loss: -0.01107025146484375|unsuper_loss: 0.0
-average reward score: 5.1953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.82%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6356|ppo_ep: 1|act_loss: 0.01422882080078125|cri_loss: 0.00811767578125|unsuper_loss: 0.0
-average reward score: 5.2421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.31%) |Training time=0.48s (22.08%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6357|ppo_ep: 1|act_loss: 0.045135498046875|cri_loss: 0.0232696533203125|unsuper_loss: 0.0
-average reward score: 5.5703125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.47s (21.89%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6358|ppo_ep: 1|act_loss: -0.01306915283203125|cri_loss: -0.00640106201171875|unsuper_loss: 0.0
-average reward score: 5.328125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.42%) |Training time=0.48s (21.99%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.53
-[2023-04-14 12:40:56,326] [INFO] [logging.py:96:log_dist] [Rank 0] step=6360, skipped=82, lr=[1.3378815837745404e-06, 1.3378815837745404e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:40:56,344] [INFO] [timer.py:199:stop] epoch=0/micro_step=6360/global_step=6360, RunningAvgSamplesPerSec=105.63503569608532, CurrSamplesPerSec=101.74754325784137, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:40:56,437] [INFO] [logging.py:96:log_dist] [Rank 0] step=6360, skipped=106, lr=[7.092451414383644e-07, 7.092451414383644e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6359|ppo_ep: 1|act_loss: 0.031951904296875|cri_loss: 0.0164794921875|unsuper_loss: 0.0
-average reward score: 5.8359375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.80%) |Training time=0.48s (21.65%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6360|ppo_ep: 1|act_loss: 0.0017070770263671875|cri_loss: 0.00101470947265625|unsuper_loss: 0.0
-average reward score: 5.47265625
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.71s (73.05%) |Training time=0.53s (22.65%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6361|ppo_ep: 1|act_loss: 0.020721435546875|cri_loss: 0.01094818115234375|unsuper_loss: 0.0
-average reward score: 5.921875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.47s (21.96%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6362|ppo_ep: 1|act_loss: 0.03717041015625|cri_loss: 0.0189056396484375|unsuper_loss: 0.0
-average reward score: 6.07421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.27%) |Training time=0.48s (22.10%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6363|ppo_ep: 1|act_loss: 0.008636474609375|cri_loss: 0.00461578369140625|unsuper_loss: 0.0
-average reward score: 5.41015625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.33%) |Training time=0.48s (22.10%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6364|ppo_ep: 1|act_loss: 0.0145111083984375|cri_loss: 0.00763702392578125|unsuper_loss: 0.0
-average reward score: 4.7109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.29%) |Training time=0.48s (22.09%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6365|ppo_ep: 1|act_loss: -0.01392364501953125|cri_loss: -0.006816864013671875|unsuper_loss: 0.0
-average reward score: 5.21484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.48s (21.97%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6366|ppo_ep: 1|act_loss: -0.023895263671875|cri_loss: -0.0116729736328125|unsuper_loss: 0.0
-average reward score: 5.3515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (22.01%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6367|ppo_ep: 1|act_loss: 0.002269744873046875|cri_loss: 0.001384735107421875|unsuper_loss: 0.0
-average reward score: 5.75
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (22.01%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6368|ppo_ep: 1|act_loss: 0.04046630859375|cri_loss: 0.022247314453125|unsuper_loss: 0.0
-average reward score: 5.83203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.22%) |Training time=0.48s (22.17%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.53
-[2023-04-14 12:41:18,188] [INFO] [logging.py:96:log_dist] [Rank 0] step=6370, skipped=82, lr=[1.3250686598657134e-06, 1.3250686598657134e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:41:18,206] [INFO] [timer.py:199:stop] epoch=0/micro_step=6370/global_step=6370, RunningAvgSamplesPerSec=105.62690440321303, CurrSamplesPerSec=102.08720838251938, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:41:18,299] [INFO] [logging.py:96:log_dist] [Rank 0] step=6370, skipped=106, lr=[7.025422122130748e-07, 7.025422122130748e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6369|ppo_ep: 1|act_loss: -0.00763702392578125|cri_loss: -0.003749847412109375|unsuper_loss: 0.0
-average reward score: 5.56640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.48s (21.99%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6370|ppo_ep: 1|act_loss: 0.00490570068359375|cri_loss: 0.003025054931640625|unsuper_loss: 0.0
-average reward score: 5.6796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.28%) |Training time=0.48s (22.10%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6371|ppo_ep: 1|act_loss: 0.0430908203125|cri_loss: 0.023895263671875|unsuper_loss: 0.0
-average reward score: 5.3984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.79%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6372|ppo_ep: 1|act_loss: 0.001590728759765625|cri_loss: 0.001018524169921875|unsuper_loss: 0.0
-average reward score: 5.15234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.34%) |Training time=0.48s (22.05%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6373|ppo_ep: 1|act_loss: -0.00396728515625|cri_loss: -0.0017547607421875|unsuper_loss: 0.0
-average reward score: 5.66015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.84%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6374|ppo_ep: 1|act_loss: 0.0047454833984375|cri_loss: 0.002521514892578125|unsuper_loss: 0.0
-average reward score: 5.64453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.99%) |Training time=0.46s (21.35%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6375|ppo_ep: 1|act_loss: -0.0148773193359375|cri_loss: -0.007122039794921875|unsuper_loss: 0.0
-average reward score: 4.546875
--------------------------------------------------------------------------------------
-|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.57%) |Training time=0.47s (20.13%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.63 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6376|ppo_ep: 1|act_loss: -0.0074920654296875|cri_loss: -0.0035533905029296875|unsuper_loss: 0.0
-average reward score: 5.8671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.47s (21.95%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6377|ppo_ep: 1|act_loss: -0.0041961669921875|cri_loss: -0.0018596649169921875|unsuper_loss: 0.0
-average reward score: 5.05078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.29%) |Training time=0.48s (22.09%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6378|ppo_ep: 1|act_loss: -0.0028533935546875|cri_loss: -0.001171112060546875|unsuper_loss: 0.0
-average reward score: 5.5703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.50%) |Training time=0.47s (21.85%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.53
-[2023-04-14 12:41:40,024] [INFO] [logging.py:96:log_dist] [Rank 0] step=6380, skipped=82, lr=[1.3123076133824706e-06, 1.3123076133824706e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:41:40,042] [INFO] [timer.py:199:stop] epoch=0/micro_step=6380/global_step=6380, RunningAvgSamplesPerSec=105.62234845103798, CurrSamplesPerSec=103.69900664528576, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:41:40,135] [INFO] [logging.py:96:log_dist] [Rank 0] step=6380, skipped=106, lr=[6.958659256483305e-07, 6.958659256483305e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6379|ppo_ep: 1|act_loss: -0.0093841552734375|cri_loss: -0.004528045654296875|unsuper_loss: 0.0
-average reward score: 5.49609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.82%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6380|ppo_ep: 1|act_loss: -0.0149078369140625|cri_loss: -0.00719451904296875|unsuper_loss: 0.0
-average reward score: 4.71484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.80%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6381|ppo_ep: 1|act_loss: -0.0292510986328125|cri_loss: -0.01401519775390625|unsuper_loss: 0.0
-average reward score: 5.12109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.52%) |Training time=0.47s (21.80%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6382|ppo_ep: 1|act_loss: -0.0028247833251953125|cri_loss: -0.0012359619140625|unsuper_loss: 0.0
-average reward score: 5.76953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.87%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6383|ppo_ep: 1|act_loss: -0.003734588623046875|cri_loss: -0.0015010833740234375|unsuper_loss: 0.0
-average reward score: 5.01953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.78%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6384|ppo_ep: 1|act_loss: -0.0234222412109375|cri_loss: -0.01132965087890625|unsuper_loss: 0.0
-average reward score: 5.2578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.13%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6385|ppo_ep: 1|act_loss: -0.020721435546875|cri_loss: -0.010101318359375|unsuper_loss: 0.0
-average reward score: 4.78125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.84%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6386|ppo_ep: 1|act_loss: 0.003948211669921875|cri_loss: 0.0021457672119140625|unsuper_loss: 0.0
-average reward score: 6.00390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.47s (21.99%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6387|ppo_ep: 1|act_loss: 0.0122222900390625|cri_loss: 0.00632476806640625|unsuper_loss: 0.0
-average reward score: 5.828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.48s (21.97%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6388|ppo_ep: 1|act_loss: 0.021087646484375|cri_loss: 0.01094818115234375|unsuper_loss: 0.0
-average reward score: 5.23828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.52%) |Training time=0.47s (21.84%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
-[2023-04-14 12:42:01,739] [INFO] [logging.py:96:log_dist] [Rank 0] step=6390, skipped=82, lr=[1.299598633474306e-06, 1.299598633474306e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:42:01,758] [INFO] [timer.py:199:stop] epoch=0/micro_step=6390/global_step=6390, RunningAvgSamplesPerSec=105.61863925776754, CurrSamplesPerSec=104.40157717345564, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:42:01,851] [INFO] [logging.py:96:log_dist] [Rank 0] step=6390, skipped=106, lr=[6.892163807028043e-07, 6.892163807028043e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6389|ppo_ep: 1|act_loss: -0.0061798095703125|cri_loss: -0.00295257568359375|unsuper_loss: 0.0
-average reward score: 5.78125
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.94%) |Training time=0.47s (20.66%) |Others=0.10 (4.40%)|CurSamplesPerSec=14.07 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6390|ppo_ep: 1|act_loss: 0.0626220703125|cri_loss: 0.03173828125|unsuper_loss: 0.0
-average reward score: 5.11328125
--------------------------------------------------------------------------------------
-|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.54%) |Training time=0.48s (20.19%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.57 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6391|ppo_ep: 1|act_loss: 0.016357421875|cri_loss: 0.0083770751953125|unsuper_loss: 0.0
-average reward score: 5.328125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.39%) |Training time=0.47s (21.95%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6392|ppo_ep: 1|act_loss: 0.0127716064453125|cri_loss: 0.0069580078125|unsuper_loss: 0.0
-average reward score: 5.30078125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.11%) |Training time=0.48s (22.27%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
-[2023-04-14 12:42:10,680] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 6393|ppo_ep: 1|act_loss: 0.0075225830078125|cri_loss: 0.004016876220703125|unsuper_loss: 0.0
-average reward score: 5.94921875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.60%) |Training time=0.48s (22.23%) |Others=0.09 (4.17%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.53
-[2023-04-14 12:42:12,838] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 6394|ppo_ep: 1|act_loss: -0.0056915283203125|cri_loss: -0.0023746490478515625|unsuper_loss: 0.0
-average reward score: 5.25390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.49s (22.54%) |Others=0.09 (4.14%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6395|ppo_ep: 1|act_loss: -0.0128631591796875|cri_loss: -0.005878448486328125|unsuper_loss: 0.0
-average reward score: 5.80859375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.04%) |Training time=0.48s (22.35%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6396|ppo_ep: 1|act_loss: 0.03961181640625|cri_loss: 0.0205078125|unsuper_loss: 0.0
-average reward score: 5.4921875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.26%) |Training time=0.48s (22.10%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6397|ppo_ep: 1|act_loss: -0.0031223297119140625|cri_loss: -0.0006008148193359375|unsuper_loss: 0.0
-average reward score: 5.31640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.86%) |Training time=0.49s (22.51%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6398|ppo_ep: 1|act_loss: -0.008331298828125|cri_loss: -0.0038623809814453125|unsuper_loss: 0.0
-average reward score: 5.1015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.29%) |Training time=0.48s (22.08%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.53
-[2023-04-14 12:42:23,529] [INFO] [logging.py:96:log_dist] [Rank 0] step=6400, skipped=82, lr=[1.286941908518962e-06, 1.286941908518962e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:42:23,547] [INFO] [timer.py:199:stop] epoch=0/micro_step=6400/global_step=6400, RunningAvgSamplesPerSec=105.61073946158585, CurrSamplesPerSec=101.53287430195913, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:42:23,640] [INFO] [logging.py:96:log_dist] [Rank 0] step=6400, skipped=108, lr=[6.839160649594401e-07, 6.839160649594401e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6399|ppo_ep: 1|act_loss: -0.0230865478515625|cri_loss: -0.01132965087890625|unsuper_loss: 0.0
-average reward score: 4.8046875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.26%) |Training time=0.48s (22.13%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6400|ppo_ep: 1|act_loss: -0.00251007080078125|cri_loss: -0.0011920928955078125|unsuper_loss: 0.0
-average reward score: 5.83984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.25%) |Training time=0.48s (22.10%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6401|ppo_ep: 1|act_loss: 0.01190185546875|cri_loss: 0.006153106689453125|unsuper_loss: 0.0
-average reward score: 4.70703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.88%) |Training time=0.49s (22.51%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6402|ppo_ep: 1|act_loss: -0.0122222900390625|cri_loss: -0.00583648681640625|unsuper_loss: 0.0
-average reward score: 5.12109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.25%) |Training time=0.48s (22.10%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6403|ppo_ep: 1|act_loss: -0.01561737060546875|cri_loss: -0.00749969482421875|unsuper_loss: 0.0
-average reward score: 5.87890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.47s (21.96%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6404|ppo_ep: 1|act_loss: 0.018951416015625|cri_loss: 0.009796142578125|unsuper_loss: 0.0
-average reward score: 5.26171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.94%) |Training time=0.49s (22.42%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6405|ppo_ep: 1|act_loss: -0.0061798095703125|cri_loss: -0.0029392242431640625|unsuper_loss: 0.0
-average reward score: 5.84765625
--------------------------------------------------------------------------------------
-|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.40%) |Training time=0.53s (23.90%) |Others=0.11 (4.70%)|CurSamplesPerSec=14.33 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6406|ppo_ep: 1|act_loss: 0.00415802001953125|cri_loss: 0.0023365020751953125|unsuper_loss: 0.0
-average reward score: 4.7890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.13%) |Training time=0.48s (22.25%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6407|ppo_ep: 1|act_loss: -0.00318145751953125|cri_loss: -0.0015420913696289062|unsuper_loss: 0.0
-average reward score: 6.234375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.35%) |Training time=0.48s (22.04%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6408|ppo_ep: 1|act_loss: 0.0098114013671875|cri_loss: 0.005237579345703125|unsuper_loss: 0.0
-average reward score: 5.45703125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.14%) |Training time=0.48s (22.22%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
-[2023-04-14 12:42:45,229] [INFO] [logging.py:96:log_dist] [Rank 0] step=6410, skipped=82, lr=[1.2743376261196346e-06, 1.2743376261196346e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:42:45,248] [INFO] [timer.py:199:stop] epoch=0/micro_step=6410/global_step=6410, RunningAvgSamplesPerSec=105.60036418493905, CurrSamplesPerSec=102.12643591575606, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:42:45,340] [INFO] [logging.py:96:log_dist] [Rank 0] step=6410, skipped=108, lr=[6.77314903038199e-07, 6.77314903038199e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6409|ppo_ep: 1|act_loss: -0.0075836181640625|cri_loss: -0.0036602020263671875|unsuper_loss: 0.0
-average reward score: 5.0625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.02%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6410|ppo_ep: 1|act_loss: 0.05438232421875|cri_loss: 0.031829833984375|unsuper_loss: 0.0
-average reward score: 5.4453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.09%) |Training time=0.48s (22.30%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6411|ppo_ep: 1|act_loss: -0.015777587890625|cri_loss: -0.007659912109375|unsuper_loss: 0.0
-average reward score: 5.47265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.98%) |Training time=0.49s (22.39%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6412|ppo_ep: 1|act_loss: 0.059295654296875|cri_loss: 0.0310211181640625|unsuper_loss: 0.0
-average reward score: 5.64453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.47s (21.93%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6413|ppo_ep: 1|act_loss: -0.0099945068359375|cri_loss: -0.004230499267578125|unsuper_loss: 0.0
-average reward score: 5.359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.80%) |Training time=0.49s (22.61%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6414|ppo_ep: 1|act_loss: -0.0134124755859375|cri_loss: -0.006267547607421875|unsuper_loss: 0.0
-average reward score: 5.87109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.16%) |Training time=0.48s (22.20%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6415|ppo_ep: 1|act_loss: 0.029022216796875|cri_loss: 0.01480865478515625|unsuper_loss: 0.0
-average reward score: 5.43359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.17%) |Training time=0.48s (22.19%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6416|ppo_ep: 1|act_loss: -0.0063018798828125|cri_loss: -0.003021240234375|unsuper_loss: 0.0
-average reward score: 5.53515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.24%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6417|ppo_ep: 1|act_loss: 0.0084686279296875|cri_loss: 0.004512786865234375|unsuper_loss: 0.0
-average reward score: 5.0703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.23%) |Training time=0.48s (22.15%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6418|ppo_ep: 1|act_loss: 0.00885009765625|cri_loss: 0.005138397216796875|unsuper_loss: 0.0
-average reward score: 4.6484375
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.64s (73.63%) |Training time=0.48s (21.50%) |Others=0.11 (4.88%)|CurSamplesPerSec=14.38 |AvgSamplesPerSec=14.53
-[2023-04-14 12:43:07,049] [INFO] [logging.py:96:log_dist] [Rank 0] step=6420, skipped=82, lr=[1.2617859731021937e-06, 1.2617859731021937e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:43:07,067] [INFO] [timer.py:199:stop] epoch=0/micro_step=6420/global_step=6420, RunningAvgSamplesPerSec=105.59150460729504, CurrSamplesPerSec=99.78976137635176, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:43:07,160] [INFO] [logging.py:96:log_dist] [Rank 0] step=6420, skipped=108, lr=[6.707407577070841e-07, 6.707407577070841e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6419|ppo_ep: 1|act_loss: 0.057830810546875|cri_loss: 0.030792236328125|unsuper_loss: 0.0
-average reward score: 5.1328125
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.31%) |Training time=0.48s (21.31%) |Others=0.10 (4.38%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6420|ppo_ep: 1|act_loss: 0.03582763671875|cri_loss: 0.019805908203125|unsuper_loss: 0.0
-average reward score: 4.6484375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.81%) |Training time=0.49s (22.59%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6421|ppo_ep: 1|act_loss: -0.001499176025390625|cri_loss: -0.0002970695495605469|unsuper_loss: 0.0
-average reward score: 4.9140625
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.47%) |Training time=0.47s (20.25%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6422|ppo_ep: 1|act_loss: 0.024444580078125|cri_loss: 0.013214111328125|unsuper_loss: 0.0
-average reward score: 5.203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.47s (21.93%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6423|ppo_ep: 1|act_loss: 0.00295257568359375|cri_loss: 0.0017480850219726562|unsuper_loss: 0.0
-average reward score: 5.3671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.79%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6424|ppo_ep: 1|act_loss: -0.0242156982421875|cri_loss: -0.0118255615234375|unsuper_loss: 0.0
-average reward score: 5.88671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.10%) |Training time=0.48s (22.27%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6425|ppo_ep: 1|act_loss: 0.0140228271484375|cri_loss: 0.007411956787109375|unsuper_loss: 0.0
-average reward score: 4.94140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.03%) |Training time=0.48s (22.22%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6426|ppo_ep: 1|act_loss: -0.0147705078125|cri_loss: -0.007007598876953125|unsuper_loss: 0.0
-average reward score: 4.24609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.47s (21.92%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6427|ppo_ep: 1|act_loss: -0.00894927978515625|cri_loss: -0.0044097900390625|unsuper_loss: 0.0
-average reward score: 5.5390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.22%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6428|ppo_ep: 1|act_loss: -0.016326904296875|cri_loss: -0.0078277587890625|unsuper_loss: 0.0
-average reward score: 5.37890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.04%) |Training time=0.49s (22.35%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
-[2023-04-14 12:43:28,885] [INFO] [logging.py:96:log_dist] [Rank 0] step=6430, skipped=82, lr=[1.2492871355124154e-06, 1.2492871355124154e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:43:28,903] [INFO] [timer.py:199:stop] epoch=0/micro_step=6430/global_step=6430, RunningAvgSamplesPerSec=105.58394898947627, CurrSamplesPerSec=101.49256638680498, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:43:28,996] [INFO] [logging.py:96:log_dist] [Rank 0] step=6430, skipped=108, lr=[6.641937264107868e-07, 6.641937264107868e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6429|ppo_ep: 1|act_loss: -0.0140533447265625|cri_loss: -0.00669097900390625|unsuper_loss: 0.0
-average reward score: 4.6875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.31%) |Training time=0.48s (22.09%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6430|ppo_ep: 1|act_loss: -0.015594482421875|cri_loss: -0.007366180419921875|unsuper_loss: 0.0
-average reward score: 5.703125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.26%) |Training time=0.48s (22.10%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6431|ppo_ep: 1|act_loss: 0.004291534423828125|cri_loss: 0.0024585723876953125|unsuper_loss: 0.0
-average reward score: 5.0
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.47s (21.88%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6432|ppo_ep: 1|act_loss: 0.0127105712890625|cri_loss: 0.007175445556640625|unsuper_loss: 0.0
-average reward score: 5.15625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.30%) |Training time=0.48s (22.09%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6433|ppo_ep: 1|act_loss: -0.01412200927734375|cri_loss: -0.00665283203125|unsuper_loss: 0.0
-average reward score: 5.9140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.46%) |Training time=0.47s (21.93%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6434|ppo_ep: 1|act_loss: 0.0120086669921875|cri_loss: 0.00629425048828125|unsuper_loss: 0.0
-average reward score: 4.984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.82%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6435|ppo_ep: 1|act_loss: -0.0128173828125|cri_loss: -0.005954742431640625|unsuper_loss: 0.0
-average reward score: 4.84375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.83%) |Training time=0.47s (21.55%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6436|ppo_ep: 1|act_loss: -0.0032863616943359375|cri_loss: -0.0015459060668945312|unsuper_loss: 0.0
-average reward score: 5.62890625
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.60s (70.26%) |Training time=0.58s (25.32%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.06 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6437|ppo_ep: 1|act_loss: 0.00627899169921875|cri_loss: 0.0032482147216796875|unsuper_loss: 0.0
-average reward score: 4.609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.39%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6438|ppo_ep: 1|act_loss: 0.007965087890625|cri_loss: 0.005157470703125|unsuper_loss: 0.0
-average reward score: 5.69921875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.35%) |Training time=0.48s (22.04%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
-[2023-04-14 12:43:50,625] [INFO] [logging.py:96:log_dist] [Rank 0] step=6440, skipped=82, lr=[1.2368412986132292e-06, 1.2368412986132292e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:43:50,643] [INFO] [timer.py:199:stop] epoch=0/micro_step=6440/global_step=6440, RunningAvgSamplesPerSec=105.5777836336627, CurrSamplesPerSec=102.76473200476545, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:43:50,736] [INFO] [logging.py:96:log_dist] [Rank 0] step=6440, skipped=108, lr=[6.576739061921056e-07, 6.576739061921056e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6439|ppo_ep: 1|act_loss: 0.02630615234375|cri_loss: 0.01354217529296875|unsuper_loss: 0.0
-average reward score: 5.30859375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.44%) |Training time=0.47s (21.96%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6440|ppo_ep: 1|act_loss: -0.0191192626953125|cri_loss: -0.0086517333984375|unsuper_loss: 0.0
-average reward score: 5.04296875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.47s (21.75%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6441|ppo_ep: 1|act_loss: 0.00342559814453125|cri_loss: 0.0019407272338867188|unsuper_loss: 0.0
-average reward score: 5.44140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.55%) |Training time=0.47s (21.84%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6442|ppo_ep: 1|act_loss: -0.000881195068359375|cri_loss: -0.0002651214599609375|unsuper_loss: 0.0
-average reward score: 5.34375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.30%) |Training time=0.48s (22.06%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6443|ppo_ep: 1|act_loss: -0.0157928466796875|cri_loss: -0.007617950439453125|unsuper_loss: 0.0
-average reward score: 4.7421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.81%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6444|ppo_ep: 1|act_loss: -0.0029392242431640625|cri_loss: -0.0014276504516601562|unsuper_loss: 0.0
-average reward score: 5.765625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.90%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6445|ppo_ep: 1|act_loss: -0.0023975372314453125|cri_loss: -0.0009541511535644531|unsuper_loss: 0.0
-average reward score: 4.9609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.37%) |Training time=0.48s (22.01%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6446|ppo_ep: 1|act_loss: 0.03143310546875|cri_loss: 0.01885986328125|unsuper_loss: 0.0
-average reward score: 4.34375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.60%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6447|ppo_ep: 1|act_loss: -0.0092315673828125|cri_loss: -0.0044708251953125|unsuper_loss: 0.0
-average reward score: 5.15625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.68%) |Training time=0.47s (21.68%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6448|ppo_ep: 1|act_loss: -0.002841949462890625|cri_loss: 0.0002040863037109375|unsuper_loss: 0.0
-average reward score: 4.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.78%) |Training time=0.47s (20.81%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.08 |AvgSamplesPerSec=14.53
-[2023-04-14 12:44:12,338] [INFO] [logging.py:96:log_dist] [Rank 0] step=6450, skipped=82, lr=[1.224448646881958e-06, 1.224448646881958e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:44:12,356] [INFO] [timer.py:199:stop] epoch=0/micro_step=6450/global_step=6450, RunningAvgSamplesPerSec=105.5742490838407, CurrSamplesPerSec=102.64236918076665, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:44:12,448] [INFO] [logging.py:96:log_dist] [Rank 0] step=6450, skipped=108, lr=[6.511813936905043e-07, 6.511813936905043e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6449|ppo_ep: 1|act_loss: 0.00443267822265625|cri_loss: 0.002407073974609375|unsuper_loss: 0.0
-average reward score: 5.19921875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.47s (22.01%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6450|ppo_ep: 1|act_loss: 0.01702880859375|cri_loss: 0.00946807861328125|unsuper_loss: 0.0
-average reward score: 5.30078125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.65%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6451|ppo_ep: 1|act_loss: -0.0035552978515625|cri_loss: -0.00168609619140625|unsuper_loss: 0.0
-average reward score: 5.3515625
--------------------------------------------------------------------------------------
-|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.55%) |Training time=0.47s (20.17%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.64 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6452|ppo_ep: 1|act_loss: 0.0164794921875|cri_loss: 0.0085906982421875|unsuper_loss: 0.0
-average reward score: 5.63671875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.96%) |Training time=0.49s (22.45%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6453|ppo_ep: 1|act_loss: 0.007053375244140625|cri_loss: 0.0038814544677734375|unsuper_loss: 0.0
-average reward score: 4.7890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.23%) |Training time=0.48s (22.16%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6454|ppo_ep: 1|act_loss: -0.013641357421875|cri_loss: -0.0064697265625|unsuper_loss: 0.0
-average reward score: 4.8125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.23%) |Training time=0.48s (22.16%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6455|ppo_ep: 1|act_loss: 0.036041259765625|cri_loss: 0.0184478759765625|unsuper_loss: 0.0
-average reward score: 5.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.13%) |Training time=0.48s (22.28%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6456|ppo_ep: 1|act_loss: 0.0010776519775390625|cri_loss: 0.0009908676147460938|unsuper_loss: 0.0
-average reward score: 6.23828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (22.00%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6457|ppo_ep: 1|act_loss: 0.039031982421875|cri_loss: 0.02099609375|unsuper_loss: 0.0
-average reward score: 5.26953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.04%) |Training time=0.49s (22.37%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6458|ppo_ep: 1|act_loss: -0.01117706298828125|cri_loss: -0.005397796630859375|unsuper_loss: 0.0
-average reward score: 4.8671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.74%) |Training time=0.47s (21.63%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
-[2023-04-14 12:44:34,188] [INFO] [logging.py:96:log_dist] [Rank 0] step=6460, skipped=82, lr=[1.2121093640075971e-06, 1.2121093640075971e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:44:34,207] [INFO] [timer.py:199:stop] epoch=0/micro_step=6460/global_step=6460, RunningAvgSamplesPerSec=105.56811658804929, CurrSamplesPerSec=105.1897695692178, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:44:34,300] [INFO] [logging.py:96:log_dist] [Rank 0] step=6460, skipped=108, lr=[6.447162851406805e-07, 6.447162851406805e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6459|ppo_ep: 1|act_loss: -0.0026264190673828125|cri_loss: -0.0008635520935058594|unsuper_loss: 0.0
-average reward score: 5.09765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.87%) |Training time=0.47s (21.53%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6460|ppo_ep: 1|act_loss: -0.0277099609375|cri_loss: -0.013702392578125|unsuper_loss: 0.0
-average reward score: 5.5
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.27%) |Training time=0.46s (21.11%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6461|ppo_ep: 1|act_loss: 0.051910400390625|cri_loss: 0.0283203125|unsuper_loss: 0.0
-average reward score: 5.41015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.21%) |Training time=0.46s (21.14%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6462|ppo_ep: 1|act_loss: -0.0036907196044921875|cri_loss: -0.001331329345703125|unsuper_loss: 0.0
-average reward score: 5.80078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.40%) |Training time=0.45s (20.97%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6463|ppo_ep: 1|act_loss: 0.0004858970642089844|cri_loss: 0.00047969818115234375|unsuper_loss: 0.0
-average reward score: 5.33984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.41%) |Training time=0.45s (20.95%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6464|ppo_ep: 1|act_loss: -0.0036411285400390625|cri_loss: -0.001377105712890625|unsuper_loss: 0.0
-average reward score: 5.4765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.13%) |Training time=0.46s (21.26%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6465|ppo_ep: 1|act_loss: 0.0038700103759765625|cri_loss: 0.002208709716796875|unsuper_loss: 0.0
-average reward score: 6.25
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.20%) |Training time=0.46s (21.16%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6466|ppo_ep: 1|act_loss: 0.00542449951171875|cri_loss: 0.00310516357421875|unsuper_loss: 0.0
-average reward score: 4.60546875
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.30%) |Training time=0.45s (19.34%) |Others=0.10 (4.36%)|CurSamplesPerSec=13.68 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6467|ppo_ep: 1|act_loss: 0.01116943359375|cri_loss: 0.006710052490234375|unsuper_loss: 0.0
-average reward score: 5.359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.46%) |Training time=0.45s (20.90%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6468|ppo_ep: 1|act_loss: -0.049163818359375|cri_loss: -0.0239410400390625|unsuper_loss: 0.0
-average reward score: 4.875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.53%) |Training time=0.45s (20.82%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
-[2023-04-14 12:44:56,036] [INFO] [logging.py:96:log_dist] [Rank 0] step=6470, skipped=82, lr=[1.1998236328880862e-06, 1.1998236328880862e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:44:56,055] [INFO] [timer.py:199:stop] epoch=0/micro_step=6470/global_step=6470, RunningAvgSamplesPerSec=105.57323794039799, CurrSamplesPerSec=107.27137351562263, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:44:56,147] [INFO] [logging.py:96:log_dist] [Rank 0] step=6470, skipped=108, lr=[6.382786763711393e-07, 6.382786763711393e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6469|ppo_ep: 1|act_loss: -0.0016469955444335938|cri_loss: -0.0007343292236328125|unsuper_loss: 0.0
-average reward score: 5.234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.17%) |Training time=0.46s (21.23%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6470|ppo_ep: 1|act_loss: -0.0221710205078125|cri_loss: -0.0108489990234375|unsuper_loss: 0.0
-average reward score: 5.48828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.34%) |Training time=0.45s (21.04%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6471|ppo_ep: 1|act_loss: 0.0247344970703125|cri_loss: 0.01265716552734375|unsuper_loss: 0.0
-average reward score: 4.68359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.26%) |Training time=0.46s (21.11%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6472|ppo_ep: 1|act_loss: -0.020843505859375|cri_loss: -0.0093841552734375|unsuper_loss: 0.0
-average reward score: 5.80859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.17%) |Training time=0.46s (21.20%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6473|ppo_ep: 1|act_loss: -0.0097808837890625|cri_loss: -0.00446319580078125|unsuper_loss: 0.0
-average reward score: 5.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.55%) |Training time=0.45s (20.85%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6474|ppo_ep: 1|act_loss: 0.00592803955078125|cri_loss: 0.0030498504638671875|unsuper_loss: 0.0
-average reward score: 5.75
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.46s (21.39%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6475|ppo_ep: 1|act_loss: 0.03131103515625|cri_loss: 0.0160369873046875|unsuper_loss: 0.0
-average reward score: 4.97265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.25%) |Training time=0.46s (21.12%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6476|ppo_ep: 1|act_loss: 0.0002980232238769531|cri_loss: 0.00029349327087402344|unsuper_loss: 0.0
-average reward score: 4.92578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.49%) |Training time=0.45s (20.90%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6477|ppo_ep: 1|act_loss: 0.0050506591796875|cri_loss: 0.003326416015625|unsuper_loss: 0.0
-average reward score: 5.1640625
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.50%) |Training time=0.46s (20.98%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6478|ppo_ep: 1|act_loss: 0.0130462646484375|cri_loss: 0.006927490234375|unsuper_loss: 0.0
-average reward score: 5.08984375
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.37%) |Training time=0.46s (20.20%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.53
-[2023-04-14 12:45:17,830] [INFO] [logging.py:96:log_dist] [Rank 0] step=6480, skipped=82, lr=[1.1875916356275982e-06, 1.1875916356275982e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:45:17,848] [INFO] [timer.py:199:stop] epoch=0/micro_step=6480/global_step=6480, RunningAvgSamplesPerSec=105.5773912936444, CurrSamplesPerSec=107.25242944209734, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:45:17,941] [INFO] [logging.py:96:log_dist] [Rank 0] step=6480, skipped=108, lr=[6.318686628027723e-07, 6.318686628027723e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6479|ppo_ep: 1|act_loss: -0.0038814544677734375|cri_loss: -0.0016021728515625|unsuper_loss: 0.0
-average reward score: 5.3828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.10%) |Training time=0.46s (21.30%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6480|ppo_ep: 1|act_loss: -0.0041351318359375|cri_loss: -0.0018663406372070312|unsuper_loss: 0.0
-average reward score: 4.90234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.03%) |Training time=0.46s (21.08%) |Others=0.11 (4.89%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6481|ppo_ep: 1|act_loss: -0.0183868408203125|cri_loss: -0.00872039794921875|unsuper_loss: 0.0
-average reward score: 5.5546875
--------------------------------------------------------------------------------------
-|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.18%) |Training time=0.46s (19.51%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.63 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6482|ppo_ep: 1|act_loss: 0.034393310546875|cri_loss: 0.01800537109375|unsuper_loss: 0.0
-average reward score: 6.03515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.82%) |Training time=0.47s (21.58%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6483|ppo_ep: 1|act_loss: -0.027374267578125|cri_loss: -0.01319122314453125|unsuper_loss: 0.0
-average reward score: 5.34375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.82%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6484|ppo_ep: 1|act_loss: -0.0150909423828125|cri_loss: -0.007389068603515625|unsuper_loss: 0.0
-average reward score: 4.9296875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.99%) |Training time=0.49s (22.42%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6485|ppo_ep: 1|act_loss: 0.019775390625|cri_loss: 0.01065826416015625|unsuper_loss: 0.0
-average reward score: 5.6640625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.59%) |Training time=0.47s (21.68%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6486|ppo_ep: 1|act_loss: -0.003971099853515625|cri_loss: -0.00188446044921875|unsuper_loss: 0.0
-average reward score: 5.3125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.46%) |Training time=0.41s (18.83%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6487|ppo_ep: 1|act_loss: -0.009490966796875|cri_loss: -0.004245758056640625|unsuper_loss: 0.0
-average reward score: 5.3046875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.31%) |Training time=0.46s (21.02%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6488|ppo_ep: 1|act_loss: 0.0171966552734375|cri_loss: 0.00887298583984375|unsuper_loss: 0.0
-average reward score: 5.30078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.10%) |Training time=0.44s (20.20%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.53
-[2023-04-14 12:45:39,763] [INFO] [logging.py:96:log_dist] [Rank 0] step=6490, skipped=82, lr=[1.1754135535338393e-06, 1.1754135535338393e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:45:39,781] [INFO] [timer.py:199:stop] epoch=0/micro_step=6490/global_step=6490, RunningAvgSamplesPerSec=105.5822909862731, CurrSamplesPerSec=115.98079224638602, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:45:39,874] [INFO] [logging.py:96:log_dist] [Rank 0] step=6490, skipped=108, lr=[6.254863394474447e-07, 6.254863394474447e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6489|ppo_ep: 1|act_loss: 0.0064697265625|cri_loss: 0.003589630126953125|unsuper_loss: 0.0
-average reward score: 5.390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.06%) |Training time=0.44s (20.23%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6490|ppo_ep: 1|act_loss: -0.0025119781494140625|cri_loss: -0.00113677978515625|unsuper_loss: 0.0
-average reward score: 5.2421875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.85%) |Training time=0.44s (20.43%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6491|ppo_ep: 1|act_loss: 0.027984619140625|cri_loss: 0.01507568359375|unsuper_loss: 0.0
-average reward score: 5.82421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.35%) |Training time=0.41s (18.90%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6492|ppo_ep: 1|act_loss: -0.004123687744140625|cri_loss: -0.0017004013061523438|unsuper_loss: 0.0
-average reward score: 4.8984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.08%) |Training time=0.46s (21.23%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6493|ppo_ep: 1|act_loss: 0.004833221435546875|cri_loss: 0.002483367919921875|unsuper_loss: 0.0
-average reward score: 4.6484375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.65%) |Training time=0.48s (21.73%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6494|ppo_ep: 1|act_loss: 0.03521728515625|cri_loss: 0.01904296875|unsuper_loss: 0.0
-average reward score: 5.23046875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.98%) |Training time=0.47s (21.39%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.53
-[2023-04-14 12:45:52,903] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 6495|ppo_ep: 1|act_loss: 0.0124969482421875|cri_loss: 0.00667572021484375|unsuper_loss: 0.0
-average reward score: 5.359375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.02%) |Training time=0.45s (20.76%) |Others=0.09 (4.22%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.53
-[2023-04-14 12:45:55,239] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 6496|ppo_ep: 1|act_loss: 0.0023250579833984375|cri_loss: 0.0012359619140625|unsuper_loss: 0.0
-average reward score: 5.99609375
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.06%) |Training time=0.45s (19.08%) |Others=0.09 (3.86%)|CurSamplesPerSec=13.70 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6497|ppo_ep: 1|act_loss: -0.01434326171875|cri_loss: -0.006961822509765625|unsuper_loss: 0.0
-average reward score: 5.4296875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.00%) |Training time=0.47s (21.38%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6498|ppo_ep: 1|act_loss: -0.019866943359375|cri_loss: -0.0095367431640625|unsuper_loss: 0.0
-average reward score: 5.30078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.36%) |Training time=0.45s (20.92%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.53
-[2023-04-14 12:46:01,651] [INFO] [logging.py:96:log_dist] [Rank 0] step=6500, skipped=82, lr=[1.1632895671153645e-06, 1.1632895671153645e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:46:01,668] [INFO] [timer.py:199:stop] epoch=0/micro_step=6500/global_step=6500, RunningAvgSamplesPerSec=105.59168797775624, CurrSamplesPerSec=124.59731974764485, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:46:01,761] [INFO] [logging.py:96:log_dist] [Rank 0] step=6500, skipped=110, lr=[6.204004813025569e-07, 6.204004813025569e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6499|ppo_ep: 1|act_loss: 0.0130615234375|cri_loss: 0.006610870361328125|unsuper_loss: 0.0
-average reward score: 5.859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.78%) |Training time=0.42s (19.35%) |Others=0.11 (4.87%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6500|ppo_ep: 1|act_loss: 0.00244140625|cri_loss: 0.0013818740844726562|unsuper_loss: 0.0
-average reward score: 4.9296875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.74%) |Training time=0.40s (18.52%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6501|ppo_ep: 1|act_loss: 0.01165771484375|cri_loss: 0.00713348388671875|unsuper_loss: 0.0
-average reward score: 5.703125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.04%) |Training time=0.47s (21.31%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6502|ppo_ep: 1|act_loss: 0.0173797607421875|cri_loss: 0.0088348388671875|unsuper_loss: 0.0
-average reward score: 4.984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.44%) |Training time=0.45s (20.87%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6503|ppo_ep: 1|act_loss: 0.029022216796875|cri_loss: 0.015045166015625|unsuper_loss: 0.0
-average reward score: 5.40234375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.43%) |Training time=0.45s (20.89%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6504|ppo_ep: 1|act_loss: -0.00212860107421875|cri_loss: -0.00066375732421875|unsuper_loss: 0.0
-average reward score: 4.8828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.18%) |Training time=0.46s (21.14%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6505|ppo_ep: 1|act_loss: -0.002971649169921875|cri_loss: -0.0011959075927734375|unsuper_loss: 0.0
-average reward score: 5.40234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.97%) |Training time=0.44s (20.31%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6506|ppo_ep: 1|act_loss: 0.01275634765625|cri_loss: 0.006744384765625|unsuper_loss: 0.0
-average reward score: 4.95703125
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.15%) |Training time=0.47s (21.24%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6507|ppo_ep: 1|act_loss: -0.032470703125|cri_loss: -0.01538848876953125|unsuper_loss: 0.0
-average reward score: 4.96484375
--------------------------------------------------------------------------------------
-|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.30%) |Training time=0.49s (21.31%) |Others=0.10 (4.39%)|CurSamplesPerSec=13.95 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6508|ppo_ep: 1|act_loss: -0.0428466796875|cri_loss: -0.02069091796875|unsuper_loss: 0.0
-average reward score: 4.9609375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.68%) |Training time=0.47s (21.68%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.53
-[2023-04-14 12:46:23,525] [INFO] [logging.py:96:log_dist] [Rank 0] step=6510, skipped=82, lr=[1.151219856078901e-06, 1.151219856078901e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:46:23,543] [INFO] [timer.py:199:stop] epoch=0/micro_step=6510/global_step=6510, RunningAvgSamplesPerSec=105.59656922152335, CurrSamplesPerSec=103.94636394544389, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:46:23,636] [INFO] [logging.py:96:log_dist] [Rank 0] step=6510, skipped=110, lr=[6.140682384495902e-07, 6.140682384495902e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6509|ppo_ep: 1|act_loss: 0.0023174285888671875|cri_loss: 0.0017957687377929688|unsuper_loss: 0.0
-average reward score: 5.625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.73%) |Training time=0.47s (21.66%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6510|ppo_ep: 1|act_loss: 0.01549530029296875|cri_loss: 0.00812530517578125|unsuper_loss: 0.0
-average reward score: 5.8671875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.45%) |Training time=0.48s (21.92%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6511|ppo_ep: 1|act_loss: -0.018798828125|cri_loss: -0.00925445556640625|unsuper_loss: 0.0
-average reward score: 5.58984375
--------------------------------------------------------------------------------------
-|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.77s (74.68%) |Training time=0.50s (21.07%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.49 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6512|ppo_ep: 1|act_loss: 0.015350341796875|cri_loss: 0.00786590576171875|unsuper_loss: 0.0
-average reward score: 5.78125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.44%) |Training time=0.45s (20.91%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6513|ppo_ep: 1|act_loss: -0.0010328292846679688|cri_loss: -0.00041866302490234375|unsuper_loss: 0.0
-average reward score: 5.33984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.55%) |Training time=0.45s (20.64%) |Others=0.10 (4.82%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6514|ppo_ep: 1|act_loss: -0.0087890625|cri_loss: -0.003993988037109375|unsuper_loss: 0.0
-average reward score: 6.12890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.89%) |Training time=0.44s (20.47%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6515|ppo_ep: 1|act_loss: 0.0076751708984375|cri_loss: 0.004138946533203125|unsuper_loss: 0.0
-average reward score: 5.64453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.73%) |Training time=0.45s (20.63%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6516|ppo_ep: 1|act_loss: 0.0009355545043945312|cri_loss: 0.0005583763122558594|unsuper_loss: 0.0
-average reward score: 5.44921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.38%) |Training time=0.45s (20.57%) |Others=0.11 (5.05%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6517|ppo_ep: 1|act_loss: 0.021087646484375|cri_loss: 0.011016845703125|unsuper_loss: 0.0
-average reward score: 5.484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.23%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6518|ppo_ep: 1|act_loss: 0.0050048828125|cri_loss: 0.0026264190673828125|unsuper_loss: 0.0
-average reward score: 5.44140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.88%) |Training time=0.47s (21.50%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
-[2023-04-14 12:46:45,374] [INFO] [logging.py:96:log_dist] [Rank 0] step=6520, skipped=82, lr=[1.1392045993266831e-06, 1.1392045993266831e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:46:45,392] [INFO] [timer.py:199:stop] epoch=0/micro_step=6520/global_step=6520, RunningAvgSamplesPerSec=105.6001638945027, CurrSamplesPerSec=108.51050440209877, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:46:45,485] [INFO] [logging.py:96:log_dist] [Rank 0] step=6520, skipped=110, lr=[6.077639496548926e-07, 6.077639496548926e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6519|ppo_ep: 1|act_loss: -0.0101165771484375|cri_loss: -0.004627227783203125|unsuper_loss: 0.0
-average reward score: 5.37109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.19%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6520|ppo_ep: 1|act_loss: -0.0202178955078125|cri_loss: -0.0098114013671875|unsuper_loss: 0.0
-average reward score: 5.40234375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.17%) |Training time=0.46s (21.21%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6521|ppo_ep: 1|act_loss: 0.021697998046875|cri_loss: 0.01116180419921875|unsuper_loss: 0.0
-average reward score: 5.3671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.25%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6522|ppo_ep: 1|act_loss: -0.0177001953125|cri_loss: -0.00839996337890625|unsuper_loss: 0.0
-average reward score: 5.65625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.40%) |Training time=0.45s (20.96%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6523|ppo_ep: 1|act_loss: -0.0059967041015625|cri_loss: -0.002777099609375|unsuper_loss: 0.0
-average reward score: 5.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.36%) |Training time=0.45s (20.99%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6524|ppo_ep: 1|act_loss: -0.02264404296875|cri_loss: -0.01093292236328125|unsuper_loss: 0.0
-average reward score: 5.390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.21%) |Training time=0.46s (21.16%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6525|ppo_ep: 1|act_loss: -0.0240478515625|cri_loss: -0.01178741455078125|unsuper_loss: 0.0
-average reward score: 5.48046875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.18%) |Training time=0.46s (21.21%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6526|ppo_ep: 1|act_loss: 0.02386474609375|cri_loss: 0.0123138427734375|unsuper_loss: 0.0
-average reward score: 5.04296875
--------------------------------------------------------------------------------------
-|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.47%) |Training time=0.48s (20.26%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.57 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6527|ppo_ep: 1|act_loss: 0.022735595703125|cri_loss: 0.01175689697265625|unsuper_loss: 0.0
-average reward score: 5.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.28%) |Training time=0.46s (21.10%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6528|ppo_ep: 1|act_loss: 0.0005331039428710938|cri_loss: 0.0003275871276855469|unsuper_loss: 0.0
-average reward score: 4.90234375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.23%) |Training time=0.46s (21.20%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.53
-[2023-04-14 12:47:07,221] [INFO] [logging.py:96:log_dist] [Rank 0] step=6530, skipped=82, lr=[1.1272439749537998e-06, 1.1272439749537998e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:47:07,239] [INFO] [timer.py:199:stop] epoch=0/micro_step=6530/global_step=6530, RunningAvgSamplesPerSec=105.60338435686515, CurrSamplesPerSec=109.80625927543986, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:47:07,332] [INFO] [logging.py:96:log_dist] [Rank 0] step=6530, skipped=110, lr=[6.01487708363232e-07, 6.01487708363232e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6529|ppo_ep: 1|act_loss: 0.05078125|cri_loss: 0.026214599609375|unsuper_loss: 0.0
-average reward score: 5.1875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.39%) |Training time=0.45s (21.01%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6530|ppo_ep: 1|act_loss: 0.002910614013671875|cri_loss: 0.00180816650390625|unsuper_loss: 0.0
-average reward score: 4.74609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.48%) |Training time=0.45s (20.88%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6531|ppo_ep: 1|act_loss: -0.0006227493286132812|cri_loss: 0.0004520416259765625|unsuper_loss: 0.0
-average reward score: 5.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.62%) |Training time=0.45s (20.75%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6532|ppo_ep: 1|act_loss: -0.0004138946533203125|cri_loss: 0.00014734268188476562|unsuper_loss: 0.0
-average reward score: 5.3671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.20%) |Training time=0.46s (21.16%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6533|ppo_ep: 1|act_loss: -0.022857666015625|cri_loss: -0.01116943359375|unsuper_loss: 0.0
-average reward score: 5.15625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.48%) |Training time=0.45s (20.77%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6534|ppo_ep: 1|act_loss: -0.0067138671875|cri_loss: -0.00313568115234375|unsuper_loss: 0.0
-average reward score: 5.72265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.49%) |Training time=0.45s (20.86%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6535|ppo_ep: 1|act_loss: -0.0179595947265625|cri_loss: -0.0081939697265625|unsuper_loss: 0.0
-average reward score: 4.765625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.52%) |Training time=0.45s (20.88%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6536|ppo_ep: 1|act_loss: 0.00745391845703125|cri_loss: 0.003978729248046875|unsuper_loss: 0.0
-average reward score: 5.09375
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.30%) |Training time=0.46s (20.91%) |Others=0.11 (4.79%)|CurSamplesPerSec=14.46 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6537|ppo_ep: 1|act_loss: 0.0206451416015625|cri_loss: 0.01094818115234375|unsuper_loss: 0.0
-average reward score: 4.89453125
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.95%) |Training time=0.47s (20.62%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6538|ppo_ep: 1|act_loss: -0.032745361328125|cri_loss: -0.0157318115234375|unsuper_loss: 0.0
-average reward score: 5.44921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.62%) |Training time=0.45s (20.73%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
-[2023-04-14 12:47:29,021] [INFO] [logging.py:96:log_dist] [Rank 0] step=6540, skipped=82, lr=[1.1153381602455581e-06, 1.1153381602455581e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:47:29,039] [INFO] [timer.py:199:stop] epoch=0/micro_step=6540/global_step=6540, RunningAvgSamplesPerSec=105.60941207531546, CurrSamplesPerSec=111.99790386308354, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:47:29,132] [INFO] [logging.py:96:log_dist] [Rank 0] step=6540, skipped=110, lr=[5.952396076036457e-07, 5.952396076036457e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6539|ppo_ep: 1|act_loss: 0.002498626708984375|cri_loss: 0.001873016357421875|unsuper_loss: 0.0
-average reward score: 5.484375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.56%) |Training time=0.45s (20.82%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6540|ppo_ep: 1|act_loss: -0.006824493408203125|cri_loss: -0.0032329559326171875|unsuper_loss: 0.0
-average reward score: 5.83203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.32%) |Training time=0.45s (21.04%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6541|ppo_ep: 1|act_loss: -0.019256591796875|cri_loss: -0.00939178466796875|unsuper_loss: 0.0
-average reward score: 5.77734375
--------------------------------------------------------------------------------------
-|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.69%) |Training time=0.47s (20.02%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.62 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6542|ppo_ep: 1|act_loss: 0.023956298828125|cri_loss: 0.01236724853515625|unsuper_loss: 0.0
-average reward score: 6.453125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.79%) |Training time=0.47s (21.61%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6543|ppo_ep: 1|act_loss: 0.014129638671875|cri_loss: 0.007312774658203125|unsuper_loss: 0.0
-average reward score: 4.609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.83%) |Training time=0.49s (22.55%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6544|ppo_ep: 1|act_loss: -0.0164642333984375|cri_loss: -0.0080413818359375|unsuper_loss: 0.0
-average reward score: 4.69140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.61%) |Training time=0.49s (22.77%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6545|ppo_ep: 1|act_loss: -0.00740814208984375|cri_loss: -0.003650665283203125|unsuper_loss: 0.0
-average reward score: 4.8515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.92%) |Training time=0.49s (22.47%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6546|ppo_ep: 1|act_loss: -0.0016851425170898438|cri_loss: -0.0007281303405761719|unsuper_loss: 0.0
-average reward score: 5.1953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.48s (22.38%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6547|ppo_ep: 1|act_loss: -0.0064849853515625|cri_loss: -0.0031108856201171875|unsuper_loss: 0.0
-average reward score: 5.515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.81%) |Training time=0.49s (22.57%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6548|ppo_ep: 1|act_loss: -0.0235748291015625|cri_loss: -0.01151275634765625|unsuper_loss: 0.0
-average reward score: 5.35546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.96%) |Training time=0.49s (22.42%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
-[2023-04-14 12:47:50,874] [INFO] [logging.py:96:log_dist] [Rank 0] step=6550, skipped=82, lr=[1.103487331674853e-06, 1.103487331674853e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:47:50,892] [INFO] [timer.py:199:stop] epoch=0/micro_step=6550/global_step=6550, RunningAvgSamplesPerSec=105.60097993930471, CurrSamplesPerSec=98.59352218869277, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:47:50,985] [INFO] [logging.py:96:log_dist] [Rank 0] step=6550, skipped=110, lr=[5.890197399880581e-07, 5.890197399880581e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6549|ppo_ep: 1|act_loss: -0.0108489990234375|cri_loss: -0.005306243896484375|unsuper_loss: 0.0
-average reward score: 6.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.94%) |Training time=0.49s (22.49%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6550|ppo_ep: 1|act_loss: 0.0272064208984375|cri_loss: 0.01419830322265625|unsuper_loss: 0.0
-average reward score: 5.375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.67%) |Training time=0.49s (22.72%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6551|ppo_ep: 1|act_loss: 0.005901336669921875|cri_loss: 0.003063201904296875|unsuper_loss: 0.0
-average reward score: 5.421875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.66%) |Training time=0.50s (22.74%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6552|ppo_ep: 1|act_loss: -0.0030727386474609375|cri_loss: -0.000606536865234375|unsuper_loss: 0.0
-average reward score: 4.1875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.63%) |Training time=0.49s (22.68%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6553|ppo_ep: 1|act_loss: 0.006561279296875|cri_loss: 0.003398895263671875|unsuper_loss: 0.0
-average reward score: 5.53515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.87%) |Training time=0.49s (22.52%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6554|ppo_ep: 1|act_loss: 0.011383056640625|cri_loss: 0.005832672119140625|unsuper_loss: 0.0
-average reward score: 5.84765625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.68%) |Training time=0.49s (22.72%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6555|ppo_ep: 1|act_loss: -0.00696563720703125|cri_loss: -0.00336456298828125|unsuper_loss: 0.0
-average reward score: 5.6328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.48s (22.27%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6556|ppo_ep: 1|act_loss: 0.0443115234375|cri_loss: 0.0237274169921875|unsuper_loss: 0.0
-average reward score: 5.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.61s (70.90%) |Training time=0.56s (24.58%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.13 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6557|ppo_ep: 1|act_loss: 0.0042877197265625|cri_loss: 0.002197265625|unsuper_loss: 0.0
-average reward score: 5.73828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.58%) |Training time=0.47s (21.79%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6558|ppo_ep: 1|act_loss: 0.016754150390625|cri_loss: 0.00850677490234375|unsuper_loss: 0.0
-average reward score: 5.75
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.17%) |Training time=0.48s (22.21%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
-[2023-04-14 12:48:12,678] [INFO] [logging.py:96:log_dist] [Rank 0] step=6560, skipped=82, lr=[1.091691664899555e-06, 1.091691664899555e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:48:12,696] [INFO] [timer.py:199:stop] epoch=0/micro_step=6560/global_step=6560, RunningAvgSamplesPerSec=105.58608899616205, CurrSamplesPerSec=100.87136841519845, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:48:12,789] [INFO] [logging.py:96:log_dist] [Rank 0] step=6560, skipped=110, lr=[5.828281977099129e-07, 5.828281977099129e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6559|ppo_ep: 1|act_loss: -0.0164337158203125|cri_loss: -0.00799560546875|unsuper_loss: 0.0
-average reward score: 5.5234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.23%) |Training time=0.48s (22.15%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6560|ppo_ep: 1|act_loss: -0.00439453125|cri_loss: -0.0017576217651367188|unsuper_loss: 0.0
-average reward score: 5.0078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.48s (22.27%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6561|ppo_ep: 1|act_loss: 0.029449462890625|cri_loss: 0.0149383544921875|unsuper_loss: 0.0
-average reward score: 5.4609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.14%) |Training time=0.48s (22.24%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6562|ppo_ep: 1|act_loss: 0.00925445556640625|cri_loss: 0.0048828125|unsuper_loss: 0.0
-average reward score: 5.6875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.13%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6563|ppo_ep: 1|act_loss: -0.004062652587890625|cri_loss: -0.0015611648559570312|unsuper_loss: 0.0
-average reward score: 5.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.22%) |Training time=0.48s (22.15%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6564|ppo_ep: 1|act_loss: -0.002902984619140625|cri_loss: -0.001293182373046875|unsuper_loss: 0.0
-average reward score: 5.14453125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.59%) |Training time=0.48s (22.24%) |Others=0.11 (5.17%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6565|ppo_ep: 1|act_loss: 0.026123046875|cri_loss: 0.01336669921875|unsuper_loss: 0.0
-average reward score: 5.4296875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.27%) |Training time=0.48s (22.13%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6566|ppo_ep: 1|act_loss: 0.0179443359375|cri_loss: 0.00943756103515625|unsuper_loss: 0.0
-average reward score: 5.12890625
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.69s (73.87%) |Training time=0.50s (21.73%) |Others=0.10 (4.40%)|CurSamplesPerSec=14.02 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6567|ppo_ep: 1|act_loss: 0.02716064453125|cri_loss: 0.0139312744140625|unsuper_loss: 0.0
-average reward score: 5.359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.21%) |Training time=0.48s (22.16%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6568|ppo_ep: 1|act_loss: -0.009429931640625|cri_loss: -0.004634857177734375|unsuper_loss: 0.0
-average reward score: 5.96875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.48s (22.35%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
-[2023-04-14 12:48:34,461] [INFO] [logging.py:96:log_dist] [Rank 0] step=6570, skipped=82, lr=[1.0799513347598973e-06, 1.0799513347598973e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:48:34,479] [INFO] [timer.py:199:stop] epoch=0/micro_step=6570/global_step=6570, RunningAvgSamplesPerSec=105.57698320907716, CurrSamplesPerSec=100.9552079838854, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:48:34,572] [INFO] [logging.py:96:log_dist] [Rank 0] step=6570, skipped=110, lr=[5.766650725428027e-07, 5.766650725428027e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6569|ppo_ep: 1|act_loss: -0.0192108154296875|cri_loss: -0.00888824462890625|unsuper_loss: 0.0
-average reward score: 5.83984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.22%) |Training time=0.48s (22.16%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6570|ppo_ep: 1|act_loss: -0.0006604194641113281|cri_loss: -6.580352783203125e-05|unsuper_loss: 0.0
-average reward score: 4.609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.98%) |Training time=0.48s (22.39%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6571|ppo_ep: 1|act_loss: -0.014739990234375|cri_loss: -0.00199127197265625|unsuper_loss: 0.0
-average reward score: 5.109375
--------------------------------------------------------------------------------------
-|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.74s (74.34%) |Training time=0.50s (21.37%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.63 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6572|ppo_ep: 1|act_loss: 0.0235137939453125|cri_loss: 0.01197052001953125|unsuper_loss: 0.0
-average reward score: 5.5546875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.71%) |Training time=0.50s (22.68%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6573|ppo_ep: 1|act_loss: -0.00745391845703125|cri_loss: -0.003570556640625|unsuper_loss: 0.0
-average reward score: 5.0
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.12%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6574|ppo_ep: 1|act_loss: -0.0093536376953125|cri_loss: -0.004589080810546875|unsuper_loss: 0.0
-average reward score: 5.453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.95%) |Training time=0.49s (22.44%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6575|ppo_ep: 1|act_loss: -0.0058135986328125|cri_loss: -0.0026302337646484375|unsuper_loss: 0.0
-average reward score: 4.78125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.13%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6576|ppo_ep: 1|act_loss: 0.005096435546875|cri_loss: 0.0028324127197265625|unsuper_loss: 0.0
-average reward score: 4.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.17%) |Training time=0.48s (22.19%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6577|ppo_ep: 1|act_loss: 0.022705078125|cri_loss: 0.01177978515625|unsuper_loss: 0.0
-average reward score: 5.515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.02%) |Training time=0.48s (22.35%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6578|ppo_ep: 1|act_loss: -0.009124755859375|cri_loss: -0.004364013671875|unsuper_loss: 0.0
-average reward score: 5.16796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.98%) |Training time=0.48s (22.19%) |Others=0.11 (4.84%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.53
-[2023-04-14 12:48:56,317] [INFO] [logging.py:96:log_dist] [Rank 0] step=6580, skipped=82, lr=[1.0682665152758964e-06, 1.0682665152758964e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:48:56,336] [INFO] [timer.py:199:stop] epoch=0/micro_step=6580/global_step=6580, RunningAvgSamplesPerSec=105.56653477849474, CurrSamplesPerSec=100.78146226450514, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:48:56,428] [INFO] [logging.py:96:log_dist] [Rank 0] step=6580, skipped=110, lr=[5.705304558391109e-07, 5.705304558391109e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6579|ppo_ep: 1|act_loss: -0.004825592041015625|cri_loss: -0.00231170654296875|unsuper_loss: 0.0
-average reward score: 5.6875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.23%) |Training time=0.48s (22.15%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6580|ppo_ep: 1|act_loss: -0.003093719482421875|cri_loss: -0.0014362335205078125|unsuper_loss: 0.0
-average reward score: 5.28125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.18%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6581|ppo_ep: 1|act_loss: 0.0014276504516601562|cri_loss: 0.001483917236328125|unsuper_loss: 0.0
-average reward score: 5.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.14%) |Training time=0.48s (22.22%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6582|ppo_ep: 1|act_loss: 0.00543975830078125|cri_loss: 0.0029315948486328125|unsuper_loss: 0.0
-average reward score: 5.90625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.18%) |Training time=0.48s (22.19%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6583|ppo_ep: 1|act_loss: 0.003963470458984375|cri_loss: 0.00213623046875|unsuper_loss: 0.0
-average reward score: 5.2109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.26%) |Training time=0.48s (22.09%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6584|ppo_ep: 1|act_loss: -0.0157318115234375|cri_loss: -0.007671356201171875|unsuper_loss: 0.0
-average reward score: 5.26953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.98%) |Training time=0.49s (22.41%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6585|ppo_ep: 1|act_loss: 0.019989013671875|cri_loss: 0.01015472412109375|unsuper_loss: 0.0
-average reward score: 4.8203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.47s (21.86%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6586|ppo_ep: 1|act_loss: 0.0076751708984375|cri_loss: 0.004123687744140625|unsuper_loss: 0.0
-average reward score: 5.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.74s (74.99%) |Training time=0.48s (20.55%) |Others=0.10 (4.45%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6587|ppo_ep: 1|act_loss: -0.020965576171875|cri_loss: -0.00983428955078125|unsuper_loss: 0.0
-average reward score: 5.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.19%) |Training time=0.48s (22.14%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6588|ppo_ep: 1|act_loss: -0.0211181640625|cri_loss: -0.01003265380859375|unsuper_loss: 0.0
-average reward score: 5.23828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.27%) |Training time=0.48s (22.10%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
-[2023-04-14 12:49:18,127] [INFO] [logging.py:96:log_dist] [Rank 0] step=6590, skipped=82, lr=[1.0566373796447659e-06, 1.0566373796447659e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:49:18,145] [INFO] [timer.py:199:stop] epoch=0/micro_step=6590/global_step=6590, RunningAvgSamplesPerSec=105.55925555019671, CurrSamplesPerSec=101.66607937390451, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:49:18,238] [INFO] [logging.py:96:log_dist] [Rank 0] step=6590, skipped=110, lr=[5.644244385286548e-07, 5.644244385286548e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6589|ppo_ep: 1|act_loss: -0.019561767578125|cri_loss: -0.009368896484375|unsuper_loss: 0.0
-average reward score: 5.1875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.31%) |Training time=0.48s (22.06%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6590|ppo_ep: 1|act_loss: -0.0198974609375|cri_loss: -0.00963592529296875|unsuper_loss: 0.0
-average reward score: 5.9140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.24%) |Training time=0.48s (22.06%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6591|ppo_ep: 1|act_loss: -0.0243988037109375|cri_loss: -0.011871337890625|unsuper_loss: 0.0
-average reward score: 5.78515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.03%) |Training time=0.48s (22.34%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6592|ppo_ep: 1|act_loss: -0.00626373291015625|cri_loss: -0.002712249755859375|unsuper_loss: 0.0
-average reward score: 5.6015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.19%) |Training time=0.48s (22.16%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6593|ppo_ep: 1|act_loss: 0.025970458984375|cri_loss: 0.01407623291015625|unsuper_loss: 0.0
-average reward score: 5.1953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.14%) |Training time=0.48s (22.23%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6594|ppo_ep: 1|act_loss: 0.0013799667358398438|cri_loss: 0.0013532638549804688|unsuper_loss: 0.0
-average reward score: 5.94921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.48s (22.27%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6595|ppo_ep: 1|act_loss: 0.02069091796875|cri_loss: 0.01064300537109375|unsuper_loss: 0.0
-average reward score: 5.4921875
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.96%) |Training time=0.47s (21.42%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6596|ppo_ep: 1|act_loss: -0.0013408660888671875|cri_loss: -0.00049591064453125|unsuper_loss: 0.0
-average reward score: 5.109375
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.27%) |Training time=0.48s (21.31%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.09 |AvgSamplesPerSec=14.53
-[2023-04-14 12:49:35,683] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 6597|ppo_ep: 1|act_loss: -0.003520965576171875|cri_loss: -0.0013427734375|unsuper_loss: 0.0
-average reward score: 5.5625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.63%) |Training time=0.48s (22.20%) |Others=0.09 (4.17%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.53
-[2023-04-14 12:49:37,840] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 6598|ppo_ep: 1|act_loss: 0.0361328125|cri_loss: 0.0187530517578125|unsuper_loss: 0.0
-average reward score: 5.328125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.45%) |Training time=0.48s (22.24%) |Others=0.09 (4.31%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.53
-[2023-04-14 12:49:39,893] [INFO] [logging.py:96:log_dist] [Rank 0] step=6600, skipped=82, lr=[1.0450641002383495e-06, 1.0450641002383495e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:49:39,911] [INFO] [timer.py:199:stop] epoch=0/micro_step=6600/global_step=6600, RunningAvgSamplesPerSec=105.55175453772948, CurrSamplesPerSec=100.11944716301052, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:49:40,006] [INFO] [logging.py:96:log_dist] [Rank 0] step=6600, skipped=112, lr=[5.59560277077667e-07, 5.59560277077667e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6599|ppo_ep: 1|act_loss: 0.0292510986328125|cri_loss: 0.0148773193359375|unsuper_loss: 0.0
-average reward score: 6.1796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.09%) |Training time=0.48s (22.25%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6600|ppo_ep: 1|act_loss: 0.002239227294921875|cri_loss: 0.00122833251953125|unsuper_loss: 0.0
-average reward score: 5.59765625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.21%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6601|ppo_ep: 1|act_loss: -0.0012359619140625|cri_loss: -0.0005125999450683594|unsuper_loss: 0.0
-average reward score: 5.6328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.27%) |Training time=0.48s (22.10%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6602|ppo_ep: 1|act_loss: -0.00958251953125|cri_loss: -0.00464630126953125|unsuper_loss: 0.0
-average reward score: 5.26953125
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.65s (71.46%) |Training time=0.56s (24.18%) |Others=0.10 (4.36%)|CurSamplesPerSec=13.88 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6603|ppo_ep: 1|act_loss: 0.0058135986328125|cri_loss: 0.0030841827392578125|unsuper_loss: 0.0
-average reward score: 5.0859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.90%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6604|ppo_ep: 1|act_loss: -0.0102691650390625|cri_loss: -0.00485992431640625|unsuper_loss: 0.0
-average reward score: 5.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.43%) |Training time=0.50s (22.98%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6605|ppo_ep: 1|act_loss: -0.0034236907958984375|cri_loss: -0.0015735626220703125|unsuper_loss: 0.0
-average reward score: 5.28125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.93%) |Training time=0.49s (22.46%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6606|ppo_ep: 1|act_loss: -0.02056884765625|cri_loss: -0.01004791259765625|unsuper_loss: 0.0
-average reward score: 5.08984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.88%) |Training time=0.49s (22.50%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6607|ppo_ep: 1|act_loss: -0.0178070068359375|cri_loss: -0.0086517333984375|unsuper_loss: 0.0
-average reward score: 5.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.88%) |Training time=0.47s (21.49%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6608|ppo_ep: 1|act_loss: 0.016571044921875|cri_loss: 0.00848388671875|unsuper_loss: 0.0
-average reward score: 5.13671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.04%) |Training time=0.46s (21.34%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
-[2023-04-14 12:50:01,722] [INFO] [logging.py:96:log_dist] [Rank 0] step=6610, skipped=82, lr=[1.0335468486005652e-06, 1.0335468486005652e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:50:01,740] [INFO] [timer.py:199:stop] epoch=0/micro_step=6610/global_step=6610, RunningAvgSamplesPerSec=105.54412564005361, CurrSamplesPerSec=105.0409292468261, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:50:01,836] [INFO] [logging.py:96:log_dist] [Rank 0] step=6610, skipped=112, lr=[5.53505966464195e-07, 5.53505966464195e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6609|ppo_ep: 1|act_loss: -0.007537841796875|cri_loss: -0.0036907196044921875|unsuper_loss: 0.0
-average reward score: 5.42578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.75%) |Training time=0.47s (21.53%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6610|ppo_ep: 1|act_loss: -0.001495361328125|cri_loss: -0.0006341934204101562|unsuper_loss: 0.0
-average reward score: 6.0
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.78%) |Training time=0.47s (21.60%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6611|ppo_ep: 1|act_loss: -0.0090789794921875|cri_loss: -0.004428863525390625|unsuper_loss: 0.0
-average reward score: 4.9296875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.08%) |Training time=0.47s (21.35%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6612|ppo_ep: 1|act_loss: -0.01470184326171875|cri_loss: -0.0068206787109375|unsuper_loss: 0.0
-average reward score: 5.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.75%) |Training time=0.47s (21.64%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6613|ppo_ep: 1|act_loss: 0.0101318359375|cri_loss: 0.00521087646484375|unsuper_loss: 0.0
-average reward score: 5.36328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.63%) |Training time=0.47s (21.69%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6614|ppo_ep: 1|act_loss: -0.000881195068359375|cri_loss: 7.82012939453125e-05|unsuper_loss: 0.0
-average reward score: 4.9921875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.77%) |Training time=0.47s (21.62%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6615|ppo_ep: 1|act_loss: -0.00189971923828125|cri_loss: -0.0008196830749511719|unsuper_loss: 0.0
-average reward score: 5.88671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.46s (21.36%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6616|ppo_ep: 1|act_loss: 0.0089111328125|cri_loss: 0.004726409912109375|unsuper_loss: 0.0
-average reward score: 5.53125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.91%) |Training time=0.47s (21.47%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6617|ppo_ep: 1|act_loss: -0.017181396484375|cri_loss: -0.0083770751953125|unsuper_loss: 0.0
-average reward score: 5.984375
--------------------------------------------------------------------------------------
-|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.61s (72.20%) |Training time=0.48s (21.70%) |Others=0.14 (6.10%)|CurSamplesPerSec=14.37 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6618|ppo_ep: 1|act_loss: -0.006389617919921875|cri_loss: -0.0030574798583984375|unsuper_loss: 0.0
-average reward score: 5.171875
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.68s (74.84%) |Training time=0.47s (20.68%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.23 |AvgSamplesPerSec=14.53
-[2023-04-14 12:50:23,554] [INFO] [logging.py:96:log_dist] [Rank 0] step=6620, skipped=82, lr=[1.0220857954448672e-06, 1.0220857954448672e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:50:23,572] [INFO] [timer.py:199:stop] epoch=0/micro_step=6620/global_step=6620, RunningAvgSamplesPerSec=105.54226757249026, CurrSamplesPerSec=101.52726766602092, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:50:23,665] [INFO] [logging.py:96:log_dist] [Rank 0] step=6620, skipped=112, lr=[5.474805075879616e-07, 5.474805075879616e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6619|ppo_ep: 1|act_loss: -0.0173492431640625|cri_loss: -0.00852203369140625|unsuper_loss: 0.0
-average reward score: 5.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.51%) |Training time=0.48s (21.91%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6620|ppo_ep: 1|act_loss: -0.02215576171875|cri_loss: -0.01073455810546875|unsuper_loss: 0.0
-average reward score: 5.5
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.35%) |Training time=0.48s (22.02%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6621|ppo_ep: 1|act_loss: 0.03692626953125|cri_loss: 0.0188446044921875|unsuper_loss: 0.0
-average reward score: 5.125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.01%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6622|ppo_ep: 1|act_loss: -0.00305938720703125|cri_loss: -0.001110076904296875|unsuper_loss: 0.0
-average reward score: 4.94921875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.12%) |Training time=0.48s (22.24%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6623|ppo_ep: 1|act_loss: 0.00664520263671875|cri_loss: 0.0036258697509765625|unsuper_loss: 0.0
-average reward score: 5.5703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.08%) |Training time=0.48s (22.30%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6624|ppo_ep: 1|act_loss: 0.002655029296875|cri_loss: 0.0015382766723632812|unsuper_loss: 0.0
-average reward score: 5.0
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.28%) |Training time=0.48s (22.14%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6625|ppo_ep: 1|act_loss: -0.017791748046875|cri_loss: -0.008697509765625|unsuper_loss: 0.0
-average reward score: 5.609375
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.48%) |Training time=0.48s (21.08%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.13 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6626|ppo_ep: 1|act_loss: -0.005893707275390625|cri_loss: -0.0027294158935546875|unsuper_loss: 0.0
-average reward score: 5.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.30%) |Training time=0.48s (22.06%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6627|ppo_ep: 1|act_loss: -0.0082244873046875|cri_loss: -0.003978729248046875|unsuper_loss: 0.0
-average reward score: 5.5390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.05%) |Training time=0.48s (22.11%) |Others=0.10 (4.84%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6628|ppo_ep: 1|act_loss: -0.011962890625|cri_loss: -0.0058441162109375|unsuper_loss: 0.0
-average reward score: 5.21875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.04%) |Training time=0.48s (22.33%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
-[2023-04-14 12:50:45,332] [INFO] [logging.py:96:log_dist] [Rank 0] step=6630, skipped=82, lr=[1.0106811106517118e-06, 1.0106811106517118e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:50:45,350] [INFO] [timer.py:199:stop] epoch=0/micro_step=6630/global_step=6630, RunningAvgSamplesPerSec=105.53529721787794, CurrSamplesPerSec=101.13496418923755, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:50:45,442] [INFO] [logging.py:96:log_dist] [Rank 0] step=6630, skipped=112, lr=[5.41483989760803e-07, 5.41483989760803e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6629|ppo_ep: 1|act_loss: 0.043121337890625|cri_loss: 0.0227813720703125|unsuper_loss: 0.0
-average reward score: 5.6953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.16%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6630|ppo_ep: 1|act_loss: -0.00467681884765625|cri_loss: -0.001865386962890625|unsuper_loss: 0.0
-average reward score: 5.37890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.90%) |Training time=0.49s (22.49%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6631|ppo_ep: 1|act_loss: -0.00705718994140625|cri_loss: -0.003284454345703125|unsuper_loss: 0.0
-average reward score: 5.27734375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.47s (21.97%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6632|ppo_ep: 1|act_loss: -0.001850128173828125|cri_loss: -0.0005612373352050781|unsuper_loss: 0.0
-average reward score: 5.32421875
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.35%) |Training time=0.48s (20.35%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6633|ppo_ep: 1|act_loss: 0.007160186767578125|cri_loss: 0.0037708282470703125|unsuper_loss: 0.0
-average reward score: 5.4453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.21%) |Training time=0.48s (22.15%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6634|ppo_ep: 1|act_loss: 0.026824951171875|cri_loss: 0.0143890380859375|unsuper_loss: 0.0
-average reward score: 5.37109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.21%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6635|ppo_ep: 1|act_loss: -0.00769805908203125|cri_loss: -0.0037708282470703125|unsuper_loss: 0.0
-average reward score: 5.69921875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.17%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6636|ppo_ep: 1|act_loss: -0.016510009765625|cri_loss: -0.00434112548828125|unsuper_loss: 0.0
-average reward score: 5.8203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.21%) |Training time=0.48s (22.17%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6637|ppo_ep: 1|act_loss: 0.0094146728515625|cri_loss: 0.004886627197265625|unsuper_loss: 0.0
-average reward score: 4.875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.13%) |Training time=0.48s (22.25%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6638|ppo_ep: 1|act_loss: -0.014373779296875|cri_loss: -0.006885528564453125|unsuper_loss: 0.0
-average reward score: 4.91015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.11%) |Training time=0.48s (22.26%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
-[2023-04-14 12:51:07,159] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-[2023-04-14 12:51:07,159] [INFO] [logging.py:96:log_dist] [Rank 0] step=6640, skipped=83, lr=[1.0004652290207957e-06, 1.0004652290207957e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:51:07,160] [INFO] [timer.py:199:stop] epoch=0/micro_step=6640/global_step=6640, RunningAvgSamplesPerSec=105.52910092880747, CurrSamplesPerSec=109.88258198147307, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:51:07,252] [INFO] [logging.py:96:log_dist] [Rank 0] step=6640, skipped=112, lr=[5.355165018655778e-07, 5.355165018655778e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6639|ppo_ep: 1|act_loss: -0.00891876220703125|cri_loss: -0.004344940185546875|unsuper_loss: 0.0
-average reward score: 5.36328125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.34%) |Training time=0.45s (21.05%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.53
-[2023-04-14 12:51:09,301] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 6640|ppo_ep: 1|act_loss: -0.0029621124267578125|cri_loss: -0.0013227462768554688|unsuper_loss: 0.0
-average reward score: 5.09375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.80%) |Training time=0.46s (21.55%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.53
-epoch: 0|step: 6641|ppo_ep: 1|act_loss: -0.01140594482421875|cri_loss: -0.00524139404296875|unsuper_loss: 0.0
-average reward score: 5.4453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.12%) |Training time=0.48s (22.26%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6642|ppo_ep: 1|act_loss: 0.020294189453125|cri_loss: 0.010894775390625|unsuper_loss: 0.0
-average reward score: 5.3046875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.99%) |Training time=0.49s (22.40%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6643|ppo_ep: 1|act_loss: 0.0182037353515625|cri_loss: 0.00933837890625|unsuper_loss: 0.0
-average reward score: 4.203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.02%) |Training time=0.48s (22.35%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6644|ppo_ep: 1|act_loss: -0.00525665283203125|cri_loss: -0.002307891845703125|unsuper_loss: 0.0
-average reward score: 5.11328125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.30%) |Training time=0.48s (22.07%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6645|ppo_ep: 1|act_loss: 0.0092010498046875|cri_loss: 0.005702972412109375|unsuper_loss: 0.0
-average reward score: 6.05859375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.23%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6646|ppo_ep: 1|act_loss: -0.037841796875|cri_loss: -0.01470947265625|unsuper_loss: 0.0
-average reward score: 5.76171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.22%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6647|ppo_ep: 1|act_loss: 0.005802154541015625|cri_loss: 0.0029850006103515625|unsuper_loss: 0.0
-average reward score: 5.6953125
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.88%) |Training time=0.49s (20.83%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6648|ppo_ep: 1|act_loss: 0.0277862548828125|cri_loss: 0.01418304443359375|unsuper_loss: 0.0
-average reward score: 4.96875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.81%) |Training time=0.47s (21.53%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.54
-[2023-04-14 12:51:28,951] [INFO] [logging.py:96:log_dist] [Rank 0] step=6650, skipped=84, lr=[9.90295265354164e-07, 9.90295265354164e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:51:28,969] [INFO] [timer.py:199:stop] epoch=0/micro_step=6650/global_step=6650, RunningAvgSamplesPerSec=105.52190506169607, CurrSamplesPerSec=98.76568618000208, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:51:29,062] [INFO] [logging.py:96:log_dist] [Rank 0] step=6650, skipped=112, lr=[5.295781323548535e-07, 5.295781323548535e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6649|ppo_ep: 1|act_loss: -0.032806396484375|cri_loss: -0.01139068603515625|unsuper_loss: 0.0
-average reward score: 5.41015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.01%) |Training time=0.49s (22.40%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6650|ppo_ep: 1|act_loss: -0.01148223876953125|cri_loss: -0.005680084228515625|unsuper_loss: 0.0
-average reward score: 5.47265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.16%) |Training time=0.48s (22.22%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6651|ppo_ep: 1|act_loss: -0.0098876953125|cri_loss: -0.004856109619140625|unsuper_loss: 0.0
-average reward score: 5.765625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.47s (21.90%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6652|ppo_ep: 1|act_loss: 0.0227203369140625|cri_loss: 0.0144195556640625|unsuper_loss: 0.0
-average reward score: 4.796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.03%) |Training time=0.48s (22.36%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6653|ppo_ep: 1|act_loss: 0.0352783203125|cri_loss: 0.0180816650390625|unsuper_loss: 0.0
-average reward score: 5.27734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.35%) |Training time=0.48s (22.01%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6654|ppo_ep: 1|act_loss: -0.01111602783203125|cri_loss: -0.004726409912109375|unsuper_loss: 0.0
-average reward score: 5.01171875
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.62s (72.03%) |Training time=0.49s (21.78%) |Others=0.14 (6.19%)|CurSamplesPerSec=14.22 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6655|ppo_ep: 1|act_loss: 0.003253936767578125|cri_loss: 0.002178192138671875|unsuper_loss: 0.0
-average reward score: 5.5546875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.06%) |Training time=0.48s (22.33%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6656|ppo_ep: 1|act_loss: 0.01036834716796875|cri_loss: 0.00536346435546875|unsuper_loss: 0.0
-average reward score: 4.9453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (22.04%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6657|ppo_ep: 1|act_loss: -0.001857757568359375|cri_loss: -0.0005521774291992188|unsuper_loss: 0.0
-average reward score: 5.34765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.06%) |Training time=0.48s (22.32%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6658|ppo_ep: 1|act_loss: -0.00725555419921875|cri_loss: -0.003559112548828125|unsuper_loss: 0.0
-average reward score: 5.63671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.16%) |Training time=0.48s (22.20%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.54
-[2023-04-14 12:51:50,678] [INFO] [logging.py:96:log_dist] [Rank 0] step=6660, skipped=84, lr=[9.790493086186587e-07, 9.790493086186587e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:51:50,697] [INFO] [timer.py:199:stop] epoch=0/micro_step=6660/global_step=6660, RunningAvgSamplesPerSec=105.51379557442812, CurrSamplesPerSec=99.45649093894681, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:51:50,790] [INFO] [logging.py:96:log_dist] [Rank 0] step=6660, skipped=112, lr=[5.236689692495916e-07, 5.236689692495916e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6659|ppo_ep: 1|act_loss: -0.0159912109375|cri_loss: -0.00777435302734375|unsuper_loss: 0.0
-average reward score: 5.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.48s (22.38%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6660|ppo_ep: 1|act_loss: -0.00875091552734375|cri_loss: -0.00411224365234375|unsuper_loss: 0.0
-average reward score: 5.234375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.33%) |Training time=0.47s (22.01%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6661|ppo_ep: 1|act_loss: -0.009613037109375|cri_loss: -0.0046539306640625|unsuper_loss: 0.0
-average reward score: 5.36328125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.29%) |Training time=0.48s (22.08%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6662|ppo_ep: 1|act_loss: -0.020050048828125|cri_loss: -0.00936126708984375|unsuper_loss: 0.0
-average reward score: 5.8046875
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.27%) |Training time=0.54s (24.21%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.42 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6663|ppo_ep: 1|act_loss: -0.0089111328125|cri_loss: -0.00431060791015625|unsuper_loss: 0.0
-average reward score: 4.75
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.76%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6664|ppo_ep: 1|act_loss: -0.006011962890625|cri_loss: -0.00070953369140625|unsuper_loss: 0.0
-average reward score: 5.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.94%) |Training time=0.46s (21.38%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6665|ppo_ep: 1|act_loss: -0.01068115234375|cri_loss: -0.005191802978515625|unsuper_loss: 0.0
-average reward score: 5.3203125
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.04%) |Training time=0.45s (21.27%) |Others=0.10 (4.69%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6666|ppo_ep: 1|act_loss: -0.01541900634765625|cri_loss: -0.0076141357421875|unsuper_loss: 0.0
-average reward score: 5.34375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.34%) |Training time=0.47s (22.00%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.54
-[2023-04-14 12:52:07,930] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
-epoch: 0|step: 6667|ppo_ep: 1|act_loss: 0.03076171875|cri_loss: 0.0161285400390625|unsuper_loss: 0.0
-average reward score: 5.4765625
--------------------------------------------------------------------------------------
-|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.71%) |Training time=0.43s (20.58%) |Others=0.10 (4.71%)|CurSamplesPerSec=15.15 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6668|ppo_ep: 1|act_loss: 0.005889892578125|cri_loss: 0.003192901611328125|unsuper_loss: 0.0
-average reward score: 4.875
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.02%) |Training time=0.45s (21.30%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.54
-[2023-04-14 12:52:12,186] [INFO] [logging.py:96:log_dist] [Rank 0] step=6670, skipped=85, lr=[9.689766831817931e-07, 9.689766831817931e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:52:12,205] [INFO] [timer.py:199:stop] epoch=0/micro_step=6670/global_step=6670, RunningAvgSamplesPerSec=105.51352462606164, CurrSamplesPerSec=108.40804827478699, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:52:12,297] [INFO] [logging.py:96:log_dist] [Rank 0] step=6670, skipped=112, lr=[5.177891001378454e-07, 5.177891001378454e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6669|ppo_ep: 1|act_loss: -0.0132293701171875|cri_loss: -0.006320953369140625|unsuper_loss: 0.0
-average reward score: 5.38671875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.93%) |Training time=0.46s (21.40%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6670|ppo_ep: 1|act_loss: -0.0029659271240234375|cri_loss: -0.0012969970703125|unsuper_loss: 0.0
-average reward score: 5.30078125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.05%) |Training time=0.45s (21.24%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6671|ppo_ep: 1|act_loss: -0.00601959228515625|cri_loss: -0.00296783447265625|unsuper_loss: 0.0
-average reward score: 5.78125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.85%) |Training time=0.46s (21.49%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6672|ppo_ep: 1|act_loss: -0.0157928466796875|cri_loss: -0.007663726806640625|unsuper_loss: 0.0
-average reward score: 4.9453125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.19%) |Training time=0.45s (21.10%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6673|ppo_ep: 1|act_loss: 0.002552032470703125|cri_loss: 0.001552581787109375|unsuper_loss: 0.0
-average reward score: 5.1484375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.21%) |Training time=0.45s (21.11%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6674|ppo_ep: 1|act_loss: -0.0108184814453125|cri_loss: -0.005218505859375|unsuper_loss: 0.0
-average reward score: 5.38671875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.12%) |Training time=0.45s (21.19%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6675|ppo_ep: 1|act_loss: -0.0014591217041015625|cri_loss: -0.0005102157592773438|unsuper_loss: 0.0
-average reward score: 5.95703125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.32%) |Training time=0.45s (20.86%) |Others=0.10 (4.81%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6676|ppo_ep: 1|act_loss: 0.00400543212890625|cri_loss: 0.00254058837890625|unsuper_loss: 0.0
-average reward score: 5.6875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.12%) |Training time=0.45s (21.22%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6677|ppo_ep: 1|act_loss: 0.00536346435546875|cri_loss: 0.0029087066650390625|unsuper_loss: 0.0
-average reward score: 5.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.21%) |Training time=0.45s (21.09%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6678|ppo_ep: 1|act_loss: 0.0085906982421875|cri_loss: 0.004543304443359375|unsuper_loss: 0.0
-average reward score: 6.25
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.34%) |Training time=0.48s (20.36%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.54
-[2023-04-14 12:52:33,771] [INFO] [logging.py:96:log_dist] [Rank 0] step=6680, skipped=85, lr=[9.578391801772933e-07, 9.578391801772933e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:52:33,790] [INFO] [timer.py:199:stop] epoch=0/micro_step=6680/global_step=6680, RunningAvgSamplesPerSec=105.51922652617614, CurrSamplesPerSec=109.9554075841187, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:52:33,882] [INFO] [logging.py:96:log_dist] [Rank 0] step=6680, skipped=112, lr=[5.119386121734576e-07, 5.119386121734576e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6679|ppo_ep: 1|act_loss: -0.0096435546875|cri_loss: -0.0047149658203125|unsuper_loss: 0.0
-average reward score: 6.37109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.19%) |Training time=0.45s (21.15%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6680|ppo_ep: 1|act_loss: -0.0172576904296875|cri_loss: -0.00783538818359375|unsuper_loss: 0.0
-average reward score: 5.3125
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.22%) |Training time=0.45s (21.08%) |Others=0.10 (4.70%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6681|ppo_ep: 1|act_loss: -0.0127716064453125|cri_loss: -0.006191253662109375|unsuper_loss: 0.0
-average reward score: 5.703125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.88%) |Training time=0.46s (21.44%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6682|ppo_ep: 1|act_loss: -0.00936126708984375|cri_loss: -0.0045166015625|unsuper_loss: 0.0
-average reward score: 4.59375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.16%) |Training time=0.45s (21.14%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6683|ppo_ep: 1|act_loss: -0.0095062255859375|cri_loss: -0.003978729248046875|unsuper_loss: 0.0
-average reward score: 5.171875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.46s (21.29%) |Others=0.11 (4.89%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6684|ppo_ep: 1|act_loss: 0.025115966796875|cri_loss: 0.012786865234375|unsuper_loss: 0.0
-average reward score: 5.875
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.62s (71.17%) |Training time=0.56s (24.41%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.07 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6685|ppo_ep: 1|act_loss: -0.01519775390625|cri_loss: -0.00749969482421875|unsuper_loss: 0.0
-average reward score: 5.41015625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.73%) |Training time=0.45s (21.13%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6686|ppo_ep: 1|act_loss: -0.0013332366943359375|cri_loss: -0.0005702972412109375|unsuper_loss: 0.0
-average reward score: 5.21875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.80%) |Training time=0.46s (21.52%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6687|ppo_ep: 1|act_loss: -0.019500732421875|cri_loss: -0.00909423828125|unsuper_loss: 0.0
-average reward score: 5.1484375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.89%) |Training time=0.46s (21.41%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6688|ppo_ep: 1|act_loss: -0.01041412353515625|cri_loss: -0.005092620849609375|unsuper_loss: 0.0
-average reward score: 5.38671875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.06%) |Training time=0.45s (21.25%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.54
-[2023-04-14 12:52:55,313] [INFO] [logging.py:96:log_dist] [Rank 0] step=6690, skipped=85, lr=[9.467589978251612e-07, 9.467589978251612e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:52:55,331] [INFO] [timer.py:199:stop] epoch=0/micro_step=6690/global_step=6690, RunningAvgSamplesPerSec=105.5206408271309, CurrSamplesPerSec=109.22053367771504, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:52:55,424] [INFO] [logging.py:96:log_dist] [Rank 0] step=6690, skipped=112, lr=[5.061175920747744e-07, 5.061175920747744e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6689|ppo_ep: 1|act_loss: -0.0106201171875|cri_loss: -0.00505828857421875|unsuper_loss: 0.0
-average reward score: 5.125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.97%) |Training time=0.46s (21.36%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6690|ppo_ep: 1|act_loss: -0.00559234619140625|cri_loss: -0.00270843505859375|unsuper_loss: 0.0
-average reward score: 6.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.88%) |Training time=0.46s (21.42%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6691|ppo_ep: 1|act_loss: -0.0083770751953125|cri_loss: -0.004062652587890625|unsuper_loss: 0.0
-average reward score: 5.73828125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.86%) |Training time=0.46s (21.45%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6692|ppo_ep: 1|act_loss: -0.00913238525390625|cri_loss: -0.0028133392333984375|unsuper_loss: 0.0
-average reward score: 5.828125
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.00%) |Training time=0.45s (21.32%) |Others=0.10 (4.68%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6693|ppo_ep: 1|act_loss: -0.0193328857421875|cri_loss: -0.0092010498046875|unsuper_loss: 0.0
-average reward score: 5.26171875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.62%) |Training time=0.46s (21.62%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6694|ppo_ep: 1|act_loss: -0.01210784912109375|cri_loss: -0.00600433349609375|unsuper_loss: 0.0
-average reward score: 6.19140625
--------------------------------------------------------------------------------------
-|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.59%) |Training time=0.50s (21.05%) |Others=0.10 (4.35%)|CurSamplesPerSec=13.60 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6695|ppo_ep: 1|act_loss: 0.0144500732421875|cri_loss: 0.007442474365234375|unsuper_loss: 0.0
-average reward score: 4.94140625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.01%) |Training time=0.46s (21.31%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6696|ppo_ep: 1|act_loss: 0.005863189697265625|cri_loss: 0.003330230712890625|unsuper_loss: 0.0
-average reward score: 5.046875
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.15%) |Training time=0.45s (21.15%) |Others=0.10 (4.70%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6697|ppo_ep: 1|act_loss: -0.009918212890625|cri_loss: -0.00487518310546875|unsuper_loss: 0.0
-average reward score: 5.25390625
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.06%) |Training time=0.45s (21.25%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6698|ppo_ep: 1|act_loss: 0.02923583984375|cri_loss: 0.01513671875|unsuper_loss: 0.0
-average reward score: 5.6796875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.80%) |Training time=0.46s (21.55%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.54
-[2023-04-14 12:53:16,902] [INFO] [logging.py:96:log_dist] [Rank 0] step=6700, skipped=85, lr=[9.357363003604284e-07, 9.357363003604284e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:53:16,920] [INFO] [timer.py:199:stop] epoch=0/micro_step=6700/global_step=6700, RunningAvgSamplesPerSec=105.52344625762034, CurrSamplesPerSec=109.05564597185241, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:53:17,004] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-[2023-04-14 12:53:17,005] [INFO] [logging.py:96:log_dist] [Rank 0] step=6700, skipped=113, lr=[5.009039403318924e-07, 5.009039403318924e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6699|ppo_ep: 1|act_loss: -0.01666259765625|cri_loss: -0.00447845458984375|unsuper_loss: 0.0
-average reward score: 4.7890625
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.36%) |Training time=0.46s (21.45%) |Others=0.09 (4.19%)|CurSamplesPerSec=15.06 |AvgSamplesPerSec=14.54
-[2023-04-14 12:53:19,135] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 6700|ppo_ep: 1|act_loss: 0.007534027099609375|cri_loss: 0.003940582275390625|unsuper_loss: 0.0
-average reward score: 5.96875
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.21%) |Training time=0.46s (21.58%) |Others=0.09 (4.20%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6701|ppo_ep: 1|act_loss: -0.0025157928466796875|cri_loss: -0.0008792877197265625|unsuper_loss: 0.0
-average reward score: 5.5234375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.04%) |Training time=0.46s (21.30%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6702|ppo_ep: 1|act_loss: 0.00467681884765625|cri_loss: 0.002513885498046875|unsuper_loss: 0.0
-average reward score: 5.2734375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.84%) |Training time=0.46s (21.48%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6703|ppo_ep: 1|act_loss: 0.0185394287109375|cri_loss: 0.0096435546875|unsuper_loss: 0.0
-average reward score: 5.203125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.01%) |Training time=0.46s (21.30%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6704|ppo_ep: 1|act_loss: 0.00250244140625|cri_loss: 0.0015621185302734375|unsuper_loss: 0.0
-average reward score: 5.515625
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.34%) |Training time=0.45s (20.96%) |Others=0.10 (4.69%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6705|ppo_ep: 1|act_loss: -0.0096435546875|cri_loss: -0.0043487548828125|unsuper_loss: 0.0
-average reward score: 4.9296875
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.16%) |Training time=0.45s (21.15%) |Others=0.10 (4.68%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6706|ppo_ep: 1|act_loss: 0.00850677490234375|cri_loss: 0.0045928955078125|unsuper_loss: 0.0
-average reward score: 5.43359375
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.45%) |Training time=0.44s (20.85%) |Others=0.10 (4.70%)|CurSamplesPerSec=15.03 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6707|ppo_ep: 1|act_loss: -0.016876220703125|cri_loss: -0.008056640625|unsuper_loss: 0.0
-average reward score: 5.56640625
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.94%) |Training time=0.46s (21.39%) |Others=0.10 (4.67%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6708|ppo_ep: 1|act_loss: -0.020416259765625|cri_loss: -0.0099334716796875|unsuper_loss: 0.0
-average reward score: 5.32421875
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.13%) |Training time=0.52s (23.65%) |Others=0.12 (5.22%)|CurSamplesPerSec=14.41 |AvgSamplesPerSec=14.54
-[2023-04-14 12:53:38,530] [INFO] [logging.py:96:log_dist] [Rank 0] step=6710, skipped=85, lr=[9.247712511660617e-07, 9.247712511660617e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:53:38,547] [INFO] [timer.py:199:stop] epoch=0/micro_step=6710/global_step=6710, RunningAvgSamplesPerSec=105.52877706520603, CurrSamplesPerSec=108.05545040370528, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:53:38,640] [INFO] [logging.py:96:log_dist] [Rank 0] step=6710, skipped=114, lr=[4.957142900498335e-07, 4.957142900498335e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6709|ppo_ep: 1|act_loss: -0.00634765625|cri_loss: -0.0029582977294921875|unsuper_loss: 0.0
-average reward score: 5.5625
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.11%) |Training time=0.46s (19.61%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6710|ppo_ep: 1|act_loss: -0.012725830078125|cri_loss: -0.0059661865234375|unsuper_loss: 0.0
-average reward score: 5.39453125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.81%) |Training time=0.44s (20.52%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6711|ppo_ep: 1|act_loss: 0.0023860931396484375|cri_loss: 0.0013742446899414062|unsuper_loss: 0.0
-average reward score: 5.5234375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.72%) |Training time=0.44s (20.61%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6712|ppo_ep: 1|act_loss: 0.0177459716796875|cri_loss: 0.00939178466796875|unsuper_loss: 0.0
-average reward score: 5.734375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (20.88%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6713|ppo_ep: 1|act_loss: 0.0116729736328125|cri_loss: 0.006237030029296875|unsuper_loss: 0.0
-average reward score: 5.03515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.73%) |Training time=0.44s (20.54%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6714|ppo_ep: 1|act_loss: 0.0052642822265625|cri_loss: 0.0027484893798828125|unsuper_loss: 0.0
-average reward score: 5.2421875
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.03%) |Training time=0.46s (20.56%) |Others=0.10 (4.41%)|CurSamplesPerSec=14.15 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6715|ppo_ep: 1|act_loss: -0.00588226318359375|cri_loss: -0.002285003662109375|unsuper_loss: 0.0
-average reward score: 5.81640625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.57%) |Training time=0.44s (20.76%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6716|ppo_ep: 1|act_loss: 0.007537841796875|cri_loss: 0.00385284423828125|unsuper_loss: 0.0
-average reward score: 6.08984375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.76%) |Training time=0.44s (20.56%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6717|ppo_ep: 1|act_loss: 0.00479888916015625|cri_loss: 0.00278472900390625|unsuper_loss: 0.0
-average reward score: 5.68359375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.42%) |Training time=0.45s (20.93%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6718|ppo_ep: 1|act_loss: 0.0034942626953125|cri_loss: 0.0019989013671875|unsuper_loss: 0.0
-average reward score: 5.24609375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.10%) |Training time=0.45s (21.22%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.54
-[2023-04-14 12:54:00,080] [INFO] [logging.py:96:log_dist] [Rank 0] step=6720, skipped=85, lr=[9.138640127705436e-07, 9.138640127705436e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:54:00,098] [INFO] [timer.py:199:stop] epoch=0/micro_step=6720/global_step=6720, RunningAvgSamplesPerSec=105.53805823334427, CurrSamplesPerSec=109.25004944906014, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:54:00,190] [INFO] [logging.py:96:log_dist] [Rank 0] step=6720, skipped=114, lr=[4.899762375939118e-07, 4.899762375939118e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6719|ppo_ep: 1|act_loss: -0.02252197265625|cri_loss: -0.01092529296875|unsuper_loss: 0.0
-average reward score: 5.78515625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.03%) |Training time=0.46s (21.31%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6720|ppo_ep: 1|act_loss: 0.0220947265625|cri_loss: 0.01131439208984375|unsuper_loss: 0.0
-average reward score: 6.6484375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.86%) |Training time=0.46s (21.47%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6721|ppo_ep: 1|act_loss: -0.0132293701171875|cri_loss: -0.00577545166015625|unsuper_loss: 0.0
-average reward score: 5.41796875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.69%) |Training time=0.46s (21.65%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6722|ppo_ep: 1|act_loss: -0.018951416015625|cri_loss: -0.00917816162109375|unsuper_loss: 0.0
-average reward score: 4.51171875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.46s (21.37%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6723|ppo_ep: 1|act_loss: -0.013641357421875|cri_loss: -0.006664276123046875|unsuper_loss: 0.0
-average reward score: 5.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.92%) |Training time=0.46s (21.42%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6724|ppo_ep: 1|act_loss: -0.0198211669921875|cri_loss: -0.009765625|unsuper_loss: 0.0
-average reward score: 5.37890625
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.04%) |Training time=0.48s (20.65%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6725|ppo_ep: 1|act_loss: 0.004795074462890625|cri_loss: 0.0027618408203125|unsuper_loss: 0.0
-average reward score: 5.4609375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.95%) |Training time=0.46s (21.40%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6726|ppo_ep: 1|act_loss: 0.00786590576171875|cri_loss: 0.004146575927734375|unsuper_loss: 0.0
-average reward score: 4.609375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.02%) |Training time=0.46s (21.30%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6727|ppo_ep: 1|act_loss: -0.0087127685546875|cri_loss: -0.00384521484375|unsuper_loss: 0.0
-average reward score: 5.765625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.69%) |Training time=0.46s (21.66%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6728|ppo_ep: 1|act_loss: 0.0158233642578125|cri_loss: 0.00830841064453125|unsuper_loss: 0.0
-average reward score: 5.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.98%) |Training time=0.46s (21.35%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.54
-[2023-04-14 12:54:21,710] [INFO] [logging.py:96:log_dist] [Rank 0] step=6730, skipped=85, lr=[9.03014746845457e-07, 9.03014746845457e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:54:21,729] [INFO] [timer.py:199:stop] epoch=0/micro_step=6730/global_step=6730, RunningAvgSamplesPerSec=105.53983507346179, CurrSamplesPerSec=108.58169774427452, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:54:21,821] [INFO] [logging.py:96:log_dist] [Rank 0] step=6730, skipped=114, lr=[4.842679785390672e-07, 4.842679785390672e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6729|ppo_ep: 1|act_loss: 0.0019283294677734375|cri_loss: 0.0010194778442382812|unsuper_loss: 0.0
-average reward score: 5.609375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.93%) |Training time=0.46s (21.40%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6730|ppo_ep: 1|act_loss: -0.023345947265625|cri_loss: -0.01141357421875|unsuper_loss: 0.0
-average reward score: 5.7265625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.99%) |Training time=0.46s (21.32%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6731|ppo_ep: 1|act_loss: 0.00859832763671875|cri_loss: 0.00452423095703125|unsuper_loss: 0.0
-average reward score: 5.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.80%) |Training time=0.46s (21.52%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6732|ppo_ep: 1|act_loss: -0.002971649169921875|cri_loss: -0.0011320114135742188|unsuper_loss: 0.0
-average reward score: 5.890625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.45s (21.19%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6733|ppo_ep: 1|act_loss: -0.011474609375|cri_loss: -0.00536346435546875|unsuper_loss: 0.0
-average reward score: 5.203125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.81%) |Training time=0.46s (21.52%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6734|ppo_ep: 1|act_loss: -0.00032138824462890625|cri_loss: -0.00010442733764648438|unsuper_loss: 0.0
-average reward score: 5.109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.62%) |Training time=0.47s (21.72%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6735|ppo_ep: 1|act_loss: -0.010223388671875|cri_loss: -0.004852294921875|unsuper_loss: 0.0
-average reward score: 4.62890625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.12%) |Training time=0.45s (21.21%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6736|ppo_ep: 1|act_loss: 0.0158538818359375|cri_loss: 0.0081787109375|unsuper_loss: 0.0
-average reward score: 4.828125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.90%) |Training time=0.46s (21.44%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6737|ppo_ep: 1|act_loss: -0.01104736328125|cri_loss: -0.005374908447265625|unsuper_loss: 0.0
-average reward score: 5.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.89%) |Training time=0.46s (21.44%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6738|ppo_ep: 1|act_loss: 0.015716552734375|cri_loss: 0.008941650390625|unsuper_loss: 0.0
-average reward score: 5.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.89%) |Training time=0.46s (21.44%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.54
-[2023-04-14 12:54:43,139] [INFO] [logging.py:96:log_dist] [Rank 0] step=6740, skipped=85, lr=[8.922236142030963e-07, 8.922236142030963e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:54:43,165] [INFO] [timer.py:199:stop] epoch=0/micro_step=6740/global_step=6740, RunningAvgSamplesPerSec=105.54288850018418, CurrSamplesPerSec=105.9612526723656, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:54:43,311] [INFO] [logging.py:96:log_dist] [Rank 0] step=6740, skipped=114, lr=[4.785895974954696e-07, 4.785895974954696e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6739|ppo_ep: 1|act_loss: 0.042236328125|cri_loss: 0.021759033203125|unsuper_loss: 0.0
-average reward score: 5.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.74%) |Training time=0.47s (21.34%) |Others=0.15 (6.92%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6740|ppo_ep: 1|act_loss: 0.003856658935546875|cri_loss: 0.0022125244140625|unsuper_loss: 0.0
-average reward score: 4.85546875
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.02%) |Training time=0.47s (20.50%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.08 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6741|ppo_ep: 1|act_loss: -0.006992340087890625|cri_loss: -0.0033550262451171875|unsuper_loss: 0.0
-average reward score: 5.44140625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.82%) |Training time=0.47s (21.49%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6742|ppo_ep: 1|act_loss: 0.002826690673828125|cri_loss: 0.0014934539794921875|unsuper_loss: 0.0
-average reward score: 5.0078125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.33%) |Training time=0.43s (19.96%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6743|ppo_ep: 1|act_loss: 0.0209503173828125|cri_loss: 0.01145172119140625|unsuper_loss: 0.0
-average reward score: 5.35546875
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.67s (75.40%) |Training time=0.44s (19.95%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.46 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6744|ppo_ep: 1|act_loss: -0.01523590087890625|cri_loss: -0.00753021240234375|unsuper_loss: 0.0
-average reward score: 6.4765625
--------------------------------------------------------------------------------------
-|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.75s (76.03%) |Training time=0.45s (19.51%) |Others=0.10 (4.46%)|CurSamplesPerSec=13.92 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6745|ppo_ep: 1|act_loss: 0.063232421875|cri_loss: 0.03399658203125|unsuper_loss: 0.0
-average reward score: 5.53125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.24%) |Training time=0.44s (20.04%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6746|ppo_ep: 1|act_loss: 0.0103607177734375|cri_loss: 0.0072021484375|unsuper_loss: 0.0
-average reward score: 5.68359375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.31%) |Training time=0.44s (19.99%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6747|ppo_ep: 1|act_loss: -0.0131072998046875|cri_loss: -0.006072998046875|unsuper_loss: 0.0
-average reward score: 5.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.35%) |Training time=0.48s (21.81%) |Others=0.11 (4.83%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6748|ppo_ep: 1|act_loss: 0.0301055908203125|cri_loss: 0.01544952392578125|unsuper_loss: 0.0
-average reward score: 5.671875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.14%) |Training time=0.48s (22.24%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.54
-[2023-04-14 12:55:05,236] [INFO] [logging.py:96:log_dist] [Rank 0] step=6750, skipped=85, lr=[8.81490774794079e-07, 8.81490774794079e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:55:05,255] [INFO] [timer.py:199:stop] epoch=0/micro_step=6750/global_step=6750, RunningAvgSamplesPerSec=105.54850591402027, CurrSamplesPerSec=105.97112470885476, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:55:05,348] [INFO] [logging.py:96:log_dist] [Rank 0] step=6750, skipped=114, lr=[4.729411786304247e-07, 4.729411786304247e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6749|ppo_ep: 1|act_loss: -0.0013751983642578125|cri_loss: -0.00048041343688964844|unsuper_loss: 0.0
-average reward score: 5.23828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.91%) |Training time=0.47s (21.48%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6750|ppo_ep: 1|act_loss: 0.0038166046142578125|cri_loss: 0.002071380615234375|unsuper_loss: 0.0
-average reward score: 5.60546875
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.95%) |Training time=0.47s (21.46%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6751|ppo_ep: 1|act_loss: -0.01177978515625|cri_loss: -0.005767822265625|unsuper_loss: 0.0
-average reward score: 5.171875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.35%) |Training time=0.46s (21.04%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6752|ppo_ep: 1|act_loss: 0.0181884765625|cri_loss: 0.009521484375|unsuper_loss: 0.0
-average reward score: 5.59765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.21%) |Training time=0.46s (21.15%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6753|ppo_ep: 1|act_loss: -0.011199951171875|cri_loss: -0.005107879638671875|unsuper_loss: 0.0
-average reward score: 5.2578125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.06%) |Training time=0.47s (21.32%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6754|ppo_ep: 1|act_loss: -0.00250244140625|cri_loss: -0.0010585784912109375|unsuper_loss: 0.0
-average reward score: 5.14453125
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.30%) |Training time=0.45s (19.38%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.68 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6755|ppo_ep: 1|act_loss: -0.004077911376953125|cri_loss: -0.0019178390502929688|unsuper_loss: 0.0
-average reward score: 5.171875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.38%) |Training time=0.43s (19.83%) |Others=0.10 (4.79%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6756|ppo_ep: 1|act_loss: -0.00409698486328125|cri_loss: -0.0018758773803710938|unsuper_loss: 0.0
-average reward score: 4.796875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.28%) |Training time=0.46s (21.02%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6757|ppo_ep: 1|act_loss: 0.0287628173828125|cri_loss: 0.0147857666015625|unsuper_loss: 0.0
-average reward score: 4.98046875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.43%) |Training time=0.46s (21.01%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6758|ppo_ep: 1|act_loss: 0.03680419921875|cri_loss: 0.018829345703125|unsuper_loss: 0.0
-average reward score: 5.3515625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.02%) |Training time=0.47s (21.39%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.54
-[2023-04-14 12:55:27,208] [INFO] [logging.py:96:log_dist] [Rank 0] step=6760, skipped=85, lr=[8.708163877049794e-07, 8.708163877049794e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:55:27,226] [INFO] [timer.py:199:stop] epoch=0/micro_step=6760/global_step=6760, RunningAvgSamplesPerSec=105.55200119918464, CurrSamplesPerSec=105.85219716823623, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:55:27,322] [INFO] [logging.py:96:log_dist] [Rank 0] step=6760, skipped=114, lr=[4.673228056671253e-07, 4.673228056671253e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6759|ppo_ep: 1|act_loss: 0.0150146484375|cri_loss: 0.00763702392578125|unsuper_loss: 0.0
-average reward score: 5.51171875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.88%) |Training time=0.47s (21.37%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6760|ppo_ep: 1|act_loss: -0.008392333984375|cri_loss: -0.00408172607421875|unsuper_loss: 0.0
-average reward score: 5.5625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.84%) |Training time=0.47s (21.49%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6761|ppo_ep: 1|act_loss: 0.03912353515625|cri_loss: 0.020965576171875|unsuper_loss: 0.0
-average reward score: 5.9609375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.82%) |Training time=0.47s (21.58%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6762|ppo_ep: 1|act_loss: -0.00550079345703125|cri_loss: -0.0025272369384765625|unsuper_loss: 0.0
-average reward score: 5.44921875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.95%) |Training time=0.47s (21.45%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6763|ppo_ep: 1|act_loss: -0.0089874267578125|cri_loss: -0.00429534912109375|unsuper_loss: 0.0
-average reward score: 4.921875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.84%) |Training time=0.47s (21.58%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6764|ppo_ep: 1|act_loss: 0.06707763671875|cri_loss: 0.035980224609375|unsuper_loss: 0.0
-average reward score: 5.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.72s (74.51%) |Training time=0.48s (20.82%) |Others=0.11 (4.67%)|CurSamplesPerSec=13.86 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6765|ppo_ep: 1|act_loss: 0.0087127685546875|cri_loss: 0.004486083984375|unsuper_loss: 0.0
-average reward score: 5.5625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.60%) |Training time=0.48s (21.82%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6766|ppo_ep: 1|act_loss: 0.017425537109375|cri_loss: 0.00891876220703125|unsuper_loss: 0.0
-average reward score: 5.7421875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.62%) |Training time=0.48s (21.79%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6767|ppo_ep: 1|act_loss: -0.010345458984375|cri_loss: -0.004726409912109375|unsuper_loss: 0.0
-average reward score: 5.23046875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.66%) |Training time=0.47s (21.75%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6768|ppo_ep: 1|act_loss: -0.04034423828125|cri_loss: -0.01947021484375|unsuper_loss: 0.0
-average reward score: 4.66015625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.60%) |Training time=0.48s (21.80%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.54
-[2023-04-14 12:55:49,293] [INFO] [logging.py:96:log_dist] [Rank 0] step=6770, skipped=85, lr=[8.602006111559606e-07, 8.602006111559606e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:55:49,311] [INFO] [timer.py:199:stop] epoch=0/micro_step=6770/global_step=6770, RunningAvgSamplesPerSec=105.54869605550402, CurrSamplesPerSec=104.80069649682399, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:55:49,407] [INFO] [logging.py:96:log_dist] [Rank 0] step=6770, skipped=114, lr=[4.6173456188341234e-07, 4.6173456188341234e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6769|ppo_ep: 1|act_loss: -0.00492095947265625|cri_loss: -0.002368927001953125|unsuper_loss: 0.0
-average reward score: 5.3515625
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.57%) |Training time=0.47s (20.02%) |Others=0.10 (4.42%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6770|ppo_ep: 1|act_loss: -0.00428009033203125|cri_loss: -0.0020275115966796875|unsuper_loss: 0.0
-average reward score: 5.9296875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.16%) |Training time=0.46s (21.19%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6771|ppo_ep: 1|act_loss: -0.02099609375|cri_loss: -0.010284423828125|unsuper_loss: 0.0
-average reward score: 5.4765625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.69%) |Training time=0.47s (21.69%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6772|ppo_ep: 1|act_loss: 0.03363037109375|cri_loss: 0.0172271728515625|unsuper_loss: 0.0
-average reward score: 5.56640625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.39%) |Training time=0.46s (20.97%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6773|ppo_ep: 1|act_loss: -0.00351715087890625|cri_loss: -0.0007381439208984375|unsuper_loss: 0.0
-average reward score: 5.84375
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.19%) |Training time=0.46s (20.40%) |Others=0.10 (4.41%)|CurSamplesPerSec=14.07 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6774|ppo_ep: 1|act_loss: -0.0094451904296875|cri_loss: -0.00452423095703125|unsuper_loss: 0.0
-average reward score: 5.49609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.07%) |Training time=0.46s (21.24%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6775|ppo_ep: 1|act_loss: 0.01076507568359375|cri_loss: 0.00554656982421875|unsuper_loss: 0.0
-average reward score: 5.859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.18%) |Training time=0.46s (21.18%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6776|ppo_ep: 1|act_loss: -0.003173828125|cri_loss: -0.001331329345703125|unsuper_loss: 0.0
-average reward score: 5.125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.03%) |Training time=0.46s (21.34%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6777|ppo_ep: 1|act_loss: -0.00521087646484375|cri_loss: -0.0022106170654296875|unsuper_loss: 0.0
-average reward score: 5.23046875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.36%) |Training time=0.48s (22.05%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6778|ppo_ep: 1|act_loss: 0.01116943359375|cri_loss: 0.005718231201171875|unsuper_loss: 0.0
-average reward score: 5.45703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.12%) |Training time=0.46s (21.25%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.54
-[2023-04-14 12:56:11,135] [INFO] [logging.py:96:log_dist] [Rank 0] step=6780, skipped=85, lr=[8.496436024984428e-07, 8.496436024984428e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:56:11,153] [INFO] [timer.py:199:stop] epoch=0/micro_step=6780/global_step=6780, RunningAvgSamplesPerSec=105.54960072512817, CurrSamplesPerSec=107.5528061003715, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:56:11,246] [INFO] [logging.py:96:log_dist] [Rank 0] step=6780, skipped=114, lr=[4.5617653011053977e-07, 4.5617653011053977e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6779|ppo_ep: 1|act_loss: 0.0100555419921875|cri_loss: 0.00524139404296875|unsuper_loss: 0.0
-average reward score: 5.98828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.28%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6780|ppo_ep: 1|act_loss: -0.01525115966796875|cri_loss: -0.007476806640625|unsuper_loss: 0.0
-average reward score: 5.8046875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.89%) |Training time=0.47s (21.48%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6781|ppo_ep: 1|act_loss: 0.020843505859375|cri_loss: 0.01080322265625|unsuper_loss: 0.0
-average reward score: 4.92578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.01%) |Training time=0.46s (21.36%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6782|ppo_ep: 1|act_loss: -0.04522705078125|cri_loss: -0.022186279296875|unsuper_loss: 0.0
-average reward score: 5.19921875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.03%) |Training time=0.46s (21.36%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6783|ppo_ep: 1|act_loss: -0.006313323974609375|cri_loss: -0.0029087066650390625|unsuper_loss: 0.0
-average reward score: 5.66015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.80%) |Training time=0.47s (21.59%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6784|ppo_ep: 1|act_loss: -0.0184173583984375|cri_loss: -0.00901031494140625|unsuper_loss: 0.0
-average reward score: 4.83203125
--------------------------------------------------------------------------------------
-|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.60s (71.53%) |Training time=0.54s (23.98%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.30 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6785|ppo_ep: 1|act_loss: -0.0084991455078125|cri_loss: -0.004001617431640625|unsuper_loss: 0.0
-average reward score: 5.36328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.09%) |Training time=0.46s (21.29%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6786|ppo_ep: 1|act_loss: -0.0009622573852539062|cri_loss: -0.00033354759216308594|unsuper_loss: 0.0
-average reward score: 5.6640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.11%) |Training time=0.46s (21.28%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6787|ppo_ep: 1|act_loss: -0.005367279052734375|cri_loss: -0.002521514892578125|unsuper_loss: 0.0
-average reward score: 5.72265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.90%) |Training time=0.47s (21.47%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6788|ppo_ep: 1|act_loss: -0.00386810302734375|cri_loss: -0.001888275146484375|unsuper_loss: 0.0
-average reward score: 5.3828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.92%) |Training time=0.46s (21.44%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.54
-[2023-04-14 12:56:32,879] [INFO] [logging.py:96:log_dist] [Rank 0] step=6790, skipped=85, lr=[8.391455182127607e-07, 8.391455182127607e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:56:32,898] [INFO] [timer.py:199:stop] epoch=0/micro_step=6790/global_step=6790, RunningAvgSamplesPerSec=105.54857659001864, CurrSamplesPerSec=106.04438580204238, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:56:32,990] [INFO] [logging.py:96:log_dist] [Rank 0] step=6790, skipped=114, lr=[4.506487927319475e-07, 4.506487927319475e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6789|ppo_ep: 1|act_loss: 0.0081634521484375|cri_loss: 0.00420379638671875|unsuper_loss: 0.0
-average reward score: 5.7109375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.43%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6790|ppo_ep: 1|act_loss: -0.0150146484375|cri_loss: -0.006427764892578125|unsuper_loss: 0.0
-average reward score: 5.890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.60%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6791|ppo_ep: 1|act_loss: -0.012847900390625|cri_loss: -0.006328582763671875|unsuper_loss: 0.0
-average reward score: 6.421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.05%) |Training time=0.46s (21.34%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6792|ppo_ep: 1|act_loss: 0.02069091796875|cri_loss: 0.01058197021484375|unsuper_loss: 0.0
-average reward score: 4.94140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.95%) |Training time=0.46s (21.38%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6793|ppo_ep: 1|act_loss: 0.00525665283203125|cri_loss: 0.003208160400390625|unsuper_loss: 0.0
-average reward score: 5.5390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.58%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6794|ppo_ep: 1|act_loss: 0.0136566162109375|cri_loss: 0.0073394775390625|unsuper_loss: 0.0
-average reward score: 5.03125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.46s (21.39%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6795|ppo_ep: 1|act_loss: -0.0012187957763671875|cri_loss: -0.00019693374633789062|unsuper_loss: 0.0
-average reward score: 6.234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.65%) |Training time=0.47s (21.62%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6796|ppo_ep: 1|act_loss: -0.006439208984375|cri_loss: -0.003108978271484375|unsuper_loss: 0.0
-average reward score: 5.4296875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.02%) |Training time=0.46s (21.38%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6797|ppo_ep: 1|act_loss: 0.0185546875|cri_loss: 0.0095672607421875|unsuper_loss: 0.0
-average reward score: 5.5703125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.83%) |Training time=0.44s (20.52%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6798|ppo_ep: 1|act_loss: -0.00604248046875|cri_loss: -0.0026149749755859375|unsuper_loss: 0.0
-average reward score: 4.9296875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.13%) |Training time=0.46s (21.24%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.54
-[2023-04-14 12:56:54,707] [INFO] [logging.py:96:log_dist] [Rank 0] step=6800, skipped=85, lr=[8.2870651390585e-07, 8.2870651390585e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:56:54,726] [INFO] [timer.py:199:stop] epoch=0/micro_step=6800/global_step=6800, RunningAvgSamplesPerSec=105.55108170847568, CurrSamplesPerSec=114.21317600887035, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:56:54,818] [INFO] [logging.py:96:log_dist] [Rank 0] step=6800, skipped=114, lr=[4.4515143168203685e-07, 4.4515143168203685e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6799|ppo_ep: 1|act_loss: -0.0230712890625|cri_loss: -0.0112762451171875|unsuper_loss: 0.0
-average reward score: 5.48046875
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.55%) |Training time=0.44s (19.14%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6800|ppo_ep: 1|act_loss: 0.00914764404296875|cri_loss: 0.0051727294921875|unsuper_loss: 0.0
-average reward score: 5.55078125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.80%) |Training time=0.44s (20.53%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.54
-[2023-04-14 12:56:59,114] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 6801|ppo_ep: 1|act_loss: -0.00785064697265625|cri_loss: -0.0037937164306640625|unsuper_loss: 0.0
-average reward score: 5.546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.44%) |Training time=0.44s (20.36%) |Others=0.09 (4.20%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.54
-[2023-04-14 12:57:01,326] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 6802|ppo_ep: 1|act_loss: -0.00553131103515625|cri_loss: -0.002628326416015625|unsuper_loss: 0.0
-average reward score: 5.078125
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.64s (73.79%) |Training time=0.46s (20.84%) |Others=0.12 (5.38%)|CurSamplesPerSec=14.43 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6803|ppo_ep: 1|act_loss: -0.00485992431640625|cri_loss: -0.0023365020751953125|unsuper_loss: 0.0
-average reward score: 5.484375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.59%) |Training time=0.45s (20.76%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6804|ppo_ep: 1|act_loss: 0.05767822265625|cri_loss: 0.029541015625|unsuper_loss: 0.0
-average reward score: 5.078125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.40%) |Training time=0.45s (20.95%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6805|ppo_ep: 1|act_loss: -0.0240478515625|cri_loss: -0.01161956787109375|unsuper_loss: 0.0
-average reward score: 5.125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.54%) |Training time=0.45s (20.84%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6806|ppo_ep: 1|act_loss: -0.010101318359375|cri_loss: -0.004650115966796875|unsuper_loss: 0.0
-average reward score: 6.125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (20.97%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6807|ppo_ep: 1|act_loss: 0.0011091232299804688|cri_loss: 0.0006418228149414062|unsuper_loss: 0.0
-average reward score: 5.83984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (20.94%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6808|ppo_ep: 1|act_loss: 0.019500732421875|cri_loss: 0.01010894775390625|unsuper_loss: 0.0
-average reward score: 4.984375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.85%) |Training time=0.47s (21.53%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.54
-[2023-04-14 12:57:16,401] [INFO] [logging.py:96:log_dist] [Rank 0] step=6810, skipped=85, lr=[8.183267443089356e-07, 8.183267443089356e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:57:16,420] [INFO] [timer.py:199:stop] epoch=0/micro_step=6810/global_step=6810, RunningAvgSamplesPerSec=105.55701946349674, CurrSamplesPerSec=104.74279208024329, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:57:16,513] [INFO] [logging.py:96:log_dist] [Rank 0] step=6810, skipped=116, lr=[4.4077546857126923e-07, 4.4077546857126923e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6809|ppo_ep: 1|act_loss: -0.0061492919921875|cri_loss: -0.002872467041015625|unsuper_loss: 0.0
-average reward score: 5.6875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.83%) |Training time=0.47s (21.57%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6810|ppo_ep: 1|act_loss: 0.0145263671875|cri_loss: 0.00749969482421875|unsuper_loss: 0.0
-average reward score: 6.078125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.02%) |Training time=0.46s (21.36%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6811|ppo_ep: 1|act_loss: 0.005298614501953125|cri_loss: 0.0028667449951171875|unsuper_loss: 0.0
-average reward score: 5.1015625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.47%) |Training time=0.45s (20.87%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6812|ppo_ep: 1|act_loss: 0.030303955078125|cri_loss: 0.0157012939453125|unsuper_loss: 0.0
-average reward score: 5.83203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.30%) |Training time=0.45s (21.07%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6813|ppo_ep: 1|act_loss: 0.031890869140625|cri_loss: 0.0163726806640625|unsuper_loss: 0.0
-average reward score: 5.7109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.37%) |Training time=0.45s (20.98%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6814|ppo_ep: 1|act_loss: 0.00494384765625|cri_loss: 0.002788543701171875|unsuper_loss: 0.0
-average reward score: 5.359375
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.61s (72.78%) |Training time=0.48s (21.54%) |Others=0.13 (5.69%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6815|ppo_ep: 1|act_loss: -0.010528564453125|cri_loss: -0.004947662353515625|unsuper_loss: 0.0
-average reward score: 5.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.23%) |Training time=0.46s (21.11%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6816|ppo_ep: 1|act_loss: -0.0227203369140625|cri_loss: -0.01078033447265625|unsuper_loss: 0.0
-average reward score: 6.0078125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.52%) |Training time=0.45s (20.84%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6817|ppo_ep: 1|act_loss: -0.007720947265625|cri_loss: -0.0037822723388671875|unsuper_loss: 0.0
-average reward score: 4.859375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.42%) |Training time=0.45s (20.92%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6818|ppo_ep: 1|act_loss: 0.015472412109375|cri_loss: 0.00791168212890625|unsuper_loss: 0.0
-average reward score: 4.86328125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.39%) |Training time=0.45s (20.92%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.54
-[2023-04-14 12:57:38,029] [INFO] [logging.py:96:log_dist] [Rank 0] step=6820, skipped=85, lr=[8.080063632752432e-07, 8.080063632752432e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:57:38,048] [INFO] [timer.py:199:stop] epoch=0/micro_step=6820/global_step=6820, RunningAvgSamplesPerSec=105.56231609549081, CurrSamplesPerSec=111.32460272236474, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:57:38,141] [INFO] [logging.py:96:log_dist] [Rank 0] step=6820, skipped=116, lr=[4.3533298994971135e-07, 4.3533298994971135e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6819|ppo_ep: 1|act_loss: 0.01313018798828125|cri_loss: 0.00687408447265625|unsuper_loss: 0.0
-average reward score: 5.67578125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.90%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6820|ppo_ep: 1|act_loss: 0.002880096435546875|cri_loss: 0.0019025802612304688|unsuper_loss: 0.0
-average reward score: 4.97265625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.41%) |Training time=0.45s (20.93%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6821|ppo_ep: 1|act_loss: 0.01995849609375|cri_loss: 0.01031494140625|unsuper_loss: 0.0
-average reward score: 5.06640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.46%) |Training time=0.45s (20.88%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6822|ppo_ep: 1|act_loss: 0.004322052001953125|cri_loss: 0.0023365020751953125|unsuper_loss: 0.0
-average reward score: 5.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.25%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6823|ppo_ep: 1|act_loss: 0.0030231475830078125|cri_loss: 0.001598358154296875|unsuper_loss: 0.0
-average reward score: 5.04296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.50%) |Training time=0.45s (20.84%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6824|ppo_ep: 1|act_loss: 0.0154876708984375|cri_loss: 0.00792694091796875|unsuper_loss: 0.0
-average reward score: 5.23046875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.49%) |Training time=0.45s (20.85%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6825|ppo_ep: 1|act_loss: 0.0019969940185546875|cri_loss: 0.0011749267578125|unsuper_loss: 0.0
-average reward score: 5.6796875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.33%) |Training time=0.45s (21.05%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6826|ppo_ep: 1|act_loss: 0.00228118896484375|cri_loss: 0.0012102127075195312|unsuper_loss: 0.0
-average reward score: 5.29296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.59%) |Training time=0.45s (20.75%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6827|ppo_ep: 1|act_loss: 0.0092926025390625|cri_loss: 0.004863739013671875|unsuper_loss: 0.0
-average reward score: 4.99609375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.68%) |Training time=0.45s (20.67%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6828|ppo_ep: 1|act_loss: 0.01129150390625|cri_loss: 0.005802154541015625|unsuper_loss: 0.0
-average reward score: 4.984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.64%) |Training time=0.45s (20.71%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.54
-[2023-04-14 12:57:59,584] [INFO] [logging.py:96:log_dist] [Rank 0] step=6830, skipped=85, lr=[7.977455237777167e-07, 7.977455237777167e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:57:59,603] [INFO] [timer.py:199:stop] epoch=0/micro_step=6830/global_step=6830, RunningAvgSamplesPerSec=105.57015739655915, CurrSamplesPerSec=110.71612653627207, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:57:59,695] [INFO] [logging.py:96:log_dist] [Rank 0] step=6830, skipped=116, lr=[4.2992111467398014e-07, 4.2992111467398014e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6829|ppo_ep: 1|act_loss: 0.03900146484375|cri_loss: 0.021759033203125|unsuper_loss: 0.0
-average reward score: 5.11328125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.47%) |Training time=0.45s (20.90%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6830|ppo_ep: 1|act_loss: -0.0240936279296875|cri_loss: -0.011871337890625|unsuper_loss: 0.0
-average reward score: 5.34765625
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.36%) |Training time=0.45s (19.30%) |Others=0.10 (4.34%)|CurSamplesPerSec=13.78 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6831|ppo_ep: 1|act_loss: -0.05914306640625|cri_loss: -0.0196075439453125|unsuper_loss: 0.0
-average reward score: 4.9765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.60%) |Training time=0.45s (20.71%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6832|ppo_ep: 1|act_loss: -0.03326416015625|cri_loss: -0.0150299072265625|unsuper_loss: 0.0
-average reward score: 5.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.63%) |Training time=0.45s (19.94%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.15 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6833|ppo_ep: 1|act_loss: 0.0269775390625|cri_loss: 0.0163726806640625|unsuper_loss: 0.0
-average reward score: 5.4609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.94%) |Training time=0.44s (20.43%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6834|ppo_ep: 1|act_loss: 0.03277587890625|cri_loss: 0.0167388916015625|unsuper_loss: 0.0
-average reward score: 5.078125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.54%) |Training time=0.45s (20.83%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6835|ppo_ep: 1|act_loss: -0.0255889892578125|cri_loss: -0.01232147216796875|unsuper_loss: 0.0
-average reward score: 4.8671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.35%) |Training time=0.45s (21.01%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6836|ppo_ep: 1|act_loss: -0.007732391357421875|cri_loss: -0.003696441650390625|unsuper_loss: 0.0
-average reward score: 5.96875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.83%) |Training time=0.44s (20.51%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6837|ppo_ep: 1|act_loss: -0.0099334716796875|cri_loss: -0.004741668701171875|unsuper_loss: 0.0
-average reward score: 5.265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.43%) |Training time=0.45s (20.93%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6838|ppo_ep: 1|act_loss: 0.00641632080078125|cri_loss: 0.0033054351806640625|unsuper_loss: 0.0
-average reward score: 5.671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.45%) |Training time=0.45s (20.91%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.54
-[2023-04-14 12:58:21,453] [INFO] [logging.py:96:log_dist] [Rank 0] step=6840, skipped=85, lr=[7.875443779067518e-07, 7.875443779067518e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:58:21,472] [INFO] [timer.py:199:stop] epoch=0/micro_step=6840/global_step=6840, RunningAvgSamplesPerSec=105.57848061468555, CurrSamplesPerSec=109.96874088494299, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:58:21,564] [INFO] [logging.py:96:log_dist] [Rank 0] step=6840, skipped=116, lr=[4.2453992296112384e-07, 4.2453992296112384e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6839|ppo_ep: 1|act_loss: 0.01873779296875|cri_loss: 0.00957489013671875|unsuper_loss: 0.0
-average reward score: 4.9765625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.38%) |Training time=0.45s (20.99%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6840|ppo_ep: 1|act_loss: 0.0223388671875|cri_loss: 0.01148223876953125|unsuper_loss: 0.0
-average reward score: 5.9140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.43%) |Training time=0.45s (20.92%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6841|ppo_ep: 1|act_loss: 0.00296783447265625|cri_loss: 0.0015993118286132812|unsuper_loss: 0.0
-average reward score: 5.5
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.84%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6842|ppo_ep: 1|act_loss: -0.007354736328125|cri_loss: -0.003299713134765625|unsuper_loss: 0.0
-average reward score: 5.71484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.32%) |Training time=0.45s (21.04%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6843|ppo_ep: 1|act_loss: -0.006191253662109375|cri_loss: -0.0029296875|unsuper_loss: 0.0
-average reward score: 5.26171875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.52%) |Training time=0.45s (20.82%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6844|ppo_ep: 1|act_loss: -0.024139404296875|cri_loss: -0.01123809814453125|unsuper_loss: 0.0
-average reward score: 4.97265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.42%) |Training time=0.45s (20.94%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6845|ppo_ep: 1|act_loss: -0.0137786865234375|cri_loss: -0.006679534912109375|unsuper_loss: 0.0
-average reward score: 6.1796875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.55%) |Training time=0.45s (20.79%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6846|ppo_ep: 1|act_loss: 0.00154876708984375|cri_loss: 0.0009756088256835938|unsuper_loss: 0.0
-average reward score: 5.3359375
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.22%) |Training time=0.45s (19.35%) |Others=0.10 (4.42%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6847|ppo_ep: 1|act_loss: 0.0138397216796875|cri_loss: 0.00823974609375|unsuper_loss: 0.0
-average reward score: 5.8671875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.40%) |Training time=0.51s (23.10%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6848|ppo_ep: 1|act_loss: 0.0116729736328125|cri_loss: 0.006023406982421875|unsuper_loss: 0.0
-average reward score: 5.3125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.76%) |Training time=0.42s (19.48%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.54
-[2023-04-14 12:58:43,264] [INFO] [logging.py:96:log_dist] [Rank 0] step=6850, skipped=85, lr=[7.774030768679357e-07, 7.774030768679357e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:58:43,283] [INFO] [timer.py:199:stop] epoch=0/micro_step=6850/global_step=6850, RunningAvgSamplesPerSec=105.58711633613972, CurrSamplesPerSec=132.29534435123225, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:58:43,375] [INFO] [logging.py:96:log_dist] [Rank 0] step=6850, skipped=116, lr=[4.191894945733832e-07, 4.191894945733832e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6849|ppo_ep: 1|act_loss: 0.0125579833984375|cri_loss: 0.00640869140625|unsuper_loss: 0.0
-average reward score: 5.6015625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.68s (76.71%) |Training time=0.41s (18.73%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6850|ppo_ep: 1|act_loss: -0.00998687744140625|cri_loss: -0.004856109619140625|unsuper_loss: 0.0
-average reward score: 6.171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.99%) |Training time=0.46s (21.31%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6851|ppo_ep: 1|act_loss: -0.0087127685546875|cri_loss: -0.004150390625|unsuper_loss: 0.0
-average reward score: 5.75390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.15%) |Training time=0.46s (21.19%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6852|ppo_ep: 1|act_loss: -0.015838623046875|cri_loss: -0.0067901611328125|unsuper_loss: 0.0
-average reward score: 5.6171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.00%) |Training time=0.46s (21.32%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6853|ppo_ep: 1|act_loss: 0.003696441650390625|cri_loss: 0.002101898193359375|unsuper_loss: 0.0
-average reward score: 5.8671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.38%) |Training time=0.46s (21.04%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6854|ppo_ep: 1|act_loss: -0.0147705078125|cri_loss: -0.0072479248046875|unsuper_loss: 0.0
-average reward score: 4.82421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.72%) |Training time=0.45s (20.60%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6855|ppo_ep: 1|act_loss: 0.0623779296875|cri_loss: 0.033233642578125|unsuper_loss: 0.0
-average reward score: 5.83984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.53%) |Training time=0.45s (20.87%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6856|ppo_ep: 1|act_loss: -0.020172119140625|cri_loss: -0.009246826171875|unsuper_loss: 0.0
-average reward score: 5.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.73%) |Training time=0.47s (21.61%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6857|ppo_ep: 1|act_loss: 0.021636962890625|cri_loss: 0.0110321044921875|unsuper_loss: 0.0
-average reward score: 5.1875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.05%) |Training time=0.46s (21.26%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6858|ppo_ep: 1|act_loss: 0.00704193115234375|cri_loss: 0.003887176513671875|unsuper_loss: 0.0
-average reward score: 4.7890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.09%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.54
-[2023-04-14 12:59:04,969] [INFO] [logging.py:96:log_dist] [Rank 0] step=6860, skipped=85, lr=[7.673217709798165e-07, 7.673217709798165e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:59:04,988] [INFO] [timer.py:199:stop] epoch=0/micro_step=6860/global_step=6860, RunningAvgSamplesPerSec=105.58874862976468, CurrSamplesPerSec=101.04542453423234, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:59:05,081] [INFO] [logging.py:96:log_dist] [Rank 0] step=6860, skipped=116, lr=[4.1386990881701476e-07, 4.1386990881701476e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6859|ppo_ep: 1|act_loss: -0.013885498046875|cri_loss: -0.00675201416015625|unsuper_loss: 0.0
-average reward score: 4.53515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.28%) |Training time=0.48s (22.08%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6860|ppo_ep: 1|act_loss: 0.002838134765625|cri_loss: 0.0017108917236328125|unsuper_loss: 0.0
-average reward score: 5.08984375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.21%) |Training time=0.48s (22.15%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6861|ppo_ep: 1|act_loss: 0.009490966796875|cri_loss: 0.00521087646484375|unsuper_loss: 0.0
-average reward score: 5.43359375
--------------------------------------------------------------------------------------
-|E2E latency=2.42s |Gather latency=0.00s (0.00%) |Generate time=1.84s (75.80%) |Training time=0.49s (20.11%) |Others=0.10 (4.09%)|CurSamplesPerSec=13.21 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6862|ppo_ep: 1|act_loss: 0.0081939697265625|cri_loss: 0.00434112548828125|unsuper_loss: 0.0
-average reward score: 5.1328125
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.86%) |Training time=0.45s (19.70%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.10 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6863|ppo_ep: 1|act_loss: -0.00571441650390625|cri_loss: -0.00274658203125|unsuper_loss: 0.0
-average reward score: 5.87109375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.85%) |Training time=0.42s (19.45%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6864|ppo_ep: 1|act_loss: 0.01397705078125|cri_loss: 0.0092926025390625|unsuper_loss: 0.0
-average reward score: 5.703125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.66%) |Training time=0.42s (19.62%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6865|ppo_ep: 1|act_loss: -0.015228271484375|cri_loss: -0.007534027099609375|unsuper_loss: 0.0
-average reward score: 5.265625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.61%) |Training time=0.42s (19.59%) |Others=0.10 (4.80%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6866|ppo_ep: 1|act_loss: 0.004055023193359375|cri_loss: 0.0021114349365234375|unsuper_loss: 0.0
-average reward score: 5.1640625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.97%) |Training time=0.44s (20.35%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6867|ppo_ep: 1|act_loss: -0.002750396728515625|cri_loss: -0.0011930465698242188|unsuper_loss: 0.0
-average reward score: 5.56640625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.29%) |Training time=0.43s (20.04%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.54
-[2023-04-14 12:59:24,692] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 6868|ppo_ep: 1|act_loss: -0.062408447265625|cri_loss: -0.026763916015625|unsuper_loss: 0.0
-average reward score: 5.3515625
--------------------------------------------------------------------------------------
-|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.61s (76.19%) |Training time=0.40s (19.08%) |Others=0.10 (4.74%)|CurSamplesPerSec=15.13 |AvgSamplesPerSec=14.54
-[2023-04-14 12:59:26,832] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-[2023-04-14 12:59:26,833] [INFO] [logging.py:96:log_dist] [Rank 0] step=6870, skipped=87, lr=[7.59300023224231e-07, 7.59300023224231e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:59:26,833] [INFO] [timer.py:199:stop] epoch=0/micro_step=6870/global_step=6870, RunningAvgSamplesPerSec=105.6030931208463, CurrSamplesPerSec=119.03673509931426, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:59:26,925] [INFO] [logging.py:96:log_dist] [Rank 0] step=6870, skipped=116, lr=[4.085812445411133e-07, 4.085812445411133e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6869|ppo_ep: 1|act_loss: -0.018341064453125|cri_loss: -0.00882720947265625|unsuper_loss: 0.0
-average reward score: 5.5234375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.29%) |Training time=0.43s (20.13%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6870|ppo_ep: 1|act_loss: -0.0390625|cri_loss: -0.019287109375|unsuper_loss: 0.0
-average reward score: 4.8828125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.02%) |Training time=0.49s (22.40%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6871|ppo_ep: 1|act_loss: 0.02459716796875|cri_loss: 0.01280975341796875|unsuper_loss: 0.0
-average reward score: 5.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.23%) |Training time=0.49s (22.27%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6872|ppo_ep: 1|act_loss: 0.0074005126953125|cri_loss: 0.0037975311279296875|unsuper_loss: 0.0
-average reward score: 5.48046875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.61%) |Training time=0.47s (21.67%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6873|ppo_ep: 1|act_loss: 0.006969451904296875|cri_loss: 0.00408935546875|unsuper_loss: 0.0
-average reward score: 5.73046875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.35%) |Training time=0.41s (19.06%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6874|ppo_ep: 1|act_loss: -0.00328826904296875|cri_loss: -0.0014743804931640625|unsuper_loss: 0.0
-average reward score: 5.375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.41%) |Training time=0.41s (18.89%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6875|ppo_ep: 1|act_loss: -0.016204833984375|cri_loss: -0.00778961181640625|unsuper_loss: 0.0
-average reward score: 5.96484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.31%) |Training time=0.46s (21.18%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6876|ppo_ep: 1|act_loss: -0.011138916015625|cri_loss: -0.00545501708984375|unsuper_loss: 0.0
-average reward score: 5.18359375
--------------------------------------------------------------------------------------
-|E2E latency=2.38s |Gather latency=0.00s (0.00%) |Generate time=1.78s (74.83%) |Training time=0.50s (20.93%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.42 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6877|ppo_ep: 1|act_loss: 0.018096923828125|cri_loss: 0.0096282958984375|unsuper_loss: 0.0
-average reward score: 5.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.47%) |Training time=0.45s (20.86%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6878|ppo_ep: 1|act_loss: -0.0166473388671875|cri_loss: -0.00775909423828125|unsuper_loss: 0.0
-average reward score: 5.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.25%) |Training time=0.43s (20.08%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.54
-[2023-04-14 12:59:48,741] [INFO] [logging.py:96:log_dist] [Rank 0] step=6880, skipped=87, lr=[7.493270845701285e-07, 7.493270845701285e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 12:59:48,759] [INFO] [timer.py:199:stop] epoch=0/micro_step=6880/global_step=6880, RunningAvgSamplesPerSec=105.60848080411316, CurrSamplesPerSec=114.59905703068834, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 12:59:48,851] [INFO] [logging.py:96:log_dist] [Rank 0] step=6880, skipped=116, lr=[4.033235801364402e-07, 4.033235801364402e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6879|ppo_ep: 1|act_loss: -0.0301055908203125|cri_loss: -0.0129241943359375|unsuper_loss: 0.0
-average reward score: 5.19921875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.01%) |Training time=0.44s (20.45%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6880|ppo_ep: 1|act_loss: -0.00040602684020996094|cri_loss: -5.7220458984375e-05|unsuper_loss: 0.0
-average reward score: 6.0
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.07%) |Training time=0.46s (21.28%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6881|ppo_ep: 1|act_loss: -0.0007367134094238281|cri_loss: -0.000293731689453125|unsuper_loss: 0.0
-average reward score: 5.8671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.25%) |Training time=0.46s (21.22%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6882|ppo_ep: 1|act_loss: 0.005901336669921875|cri_loss: 0.00348663330078125|unsuper_loss: 0.0
-average reward score: 5.91015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.16%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6883|ppo_ep: 1|act_loss: 0.0136260986328125|cri_loss: 0.007183074951171875|unsuper_loss: 0.0
-average reward score: 6.16796875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.16%) |Training time=0.46s (21.30%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6884|ppo_ep: 1|act_loss: 0.0162353515625|cri_loss: 0.0089569091796875|unsuper_loss: 0.0
-average reward score: 4.76953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.22%) |Training time=0.46s (21.16%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6885|ppo_ep: 1|act_loss: -0.008392333984375|cri_loss: -0.0038623809814453125|unsuper_loss: 0.0
-average reward score: 5.65234375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.43%) |Training time=0.45s (20.92%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6886|ppo_ep: 1|act_loss: -0.0019283294677734375|cri_loss: -0.0008502006530761719|unsuper_loss: 0.0
-average reward score: 6.03515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.36%) |Training time=0.46s (21.01%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6887|ppo_ep: 1|act_loss: 0.0006146430969238281|cri_loss: 0.00042629241943359375|unsuper_loss: 0.0
-average reward score: 5.59765625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.78%) |Training time=0.45s (20.69%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6888|ppo_ep: 1|act_loss: 0.009735107421875|cri_loss: 0.00511932373046875|unsuper_loss: 0.0
-average reward score: 5.375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.19%) |Training time=0.45s (20.88%) |Others=0.11 (4.93%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.54
-[2023-04-14 13:00:10,366] [INFO] [logging.py:96:log_dist] [Rank 0] step=6890, skipped=87, lr=[7.394145572206594e-07, 7.394145572206594e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:00:10,384] [INFO] [timer.py:199:stop] epoch=0/micro_step=6890/global_step=6890, RunningAvgSamplesPerSec=105.61396103794894, CurrSamplesPerSec=112.90964551727154, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:00:10,477] [INFO] [logging.py:96:log_dist] [Rank 0] step=6890, skipped=116, lr=[3.9809699353426603e-07, 3.9809699353426603e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6889|ppo_ep: 1|act_loss: -0.007568359375|cri_loss: -0.0036182403564453125|unsuper_loss: 0.0
-average reward score: 5.890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.66%) |Training time=0.45s (20.69%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6890|ppo_ep: 1|act_loss: 0.0322265625|cri_loss: 0.0164947509765625|unsuper_loss: 0.0
-average reward score: 5.84765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.21%) |Training time=0.46s (21.14%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6891|ppo_ep: 1|act_loss: 0.00508880615234375|cri_loss: 0.002838134765625|unsuper_loss: 0.0
-average reward score: 5.00390625
--------------------------------------------------------------------------------------
-|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.63s (72.97%) |Training time=0.50s (22.53%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.31 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6892|ppo_ep: 1|act_loss: -0.0269317626953125|cri_loss: -0.0132293701171875|unsuper_loss: 0.0
-average reward score: 5.48046875
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.09%) |Training time=0.46s (19.59%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.70 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6893|ppo_ep: 1|act_loss: -0.014617919921875|cri_loss: -0.007213592529296875|unsuper_loss: 0.0
-average reward score: 5.48046875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.13%) |Training time=0.49s (22.27%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6894|ppo_ep: 1|act_loss: -0.0035247802734375|cri_loss: -0.0015897750854492188|unsuper_loss: 0.0
-average reward score: 5.25390625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.09%) |Training time=0.49s (22.29%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.54
-[2023-04-14 13:00:23,665] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
-epoch: 0|step: 6895|ppo_ep: 1|act_loss: -0.02349853515625|cri_loss: -0.01157379150390625|unsuper_loss: 0.0
-average reward score: 5.625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.20%) |Training time=0.46s (21.22%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6896|ppo_ep: 1|act_loss: 0.01934814453125|cri_loss: 0.01012420654296875|unsuper_loss: 0.0
-average reward score: 5.9375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.01%) |Training time=0.49s (22.37%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6897|ppo_ep: 1|act_loss: -0.0141448974609375|cri_loss: -0.006839752197265625|unsuper_loss: 0.0
-average reward score: 4.65625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.80%) |Training time=0.47s (21.58%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6898|ppo_ep: 1|act_loss: 0.0094146728515625|cri_loss: 0.00492095947265625|unsuper_loss: 0.0
-average reward score: 5.42578125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.15%) |Training time=0.48s (22.25%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.54
-[2023-04-14 13:00:32,334] [INFO] [logging.py:96:log_dist] [Rank 0] step=6900, skipped=88, lr=[7.305450557257688e-07, 7.305450557257688e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:00:32,352] [INFO] [timer.py:199:stop] epoch=0/micro_step=6900/global_step=6900, RunningAvgSamplesPerSec=105.61108100532957, CurrSamplesPerSec=104.086465661044, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:00:32,445] [INFO] [logging.py:96:log_dist] [Rank 0] step=6900, skipped=116, lr=[3.9290156220521235e-07, 3.9290156220521235e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6899|ppo_ep: 1|act_loss: -0.0037364959716796875|cri_loss: -0.0016689300537109375|unsuper_loss: 0.0
-average reward score: 5.3984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.82%) |Training time=0.47s (21.66%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6900|ppo_ep: 1|act_loss: 0.0152740478515625|cri_loss: 0.0078887939453125|unsuper_loss: 0.0
-average reward score: 4.59765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.19%) |Training time=0.48s (22.20%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6901|ppo_ep: 1|act_loss: -0.01605224609375|cri_loss: -0.007904052734375|unsuper_loss: 0.0
-average reward score: 5.3515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.48s (21.95%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6902|ppo_ep: 1|act_loss: -0.00234222412109375|cri_loss: -0.0009698867797851562|unsuper_loss: 0.0
-average reward score: 5.01953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.46%) |Training time=0.48s (21.91%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.54
-[2023-04-14 13:00:41,119] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 6903|ppo_ep: 1|act_loss: 0.004150390625|cri_loss: 0.003192901611328125|unsuper_loss: 0.0
-average reward score: 5.17578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.48s (22.33%) |Others=0.09 (4.05%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.54
-[2023-04-14 13:00:43,275] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 6904|ppo_ep: 1|act_loss: -0.030059814453125|cri_loss: -0.014678955078125|unsuper_loss: 0.0
-average reward score: 5.9375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.95%) |Training time=0.47s (21.87%) |Others=0.09 (4.18%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6905|ppo_ep: 1|act_loss: -0.02294921875|cri_loss: -0.01105499267578125|unsuper_loss: 0.0
-average reward score: 5.89453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (21.99%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6906|ppo_ep: 1|act_loss: -0.00711822509765625|cri_loss: -0.0034732818603515625|unsuper_loss: 0.0
-average reward score: 5.53125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.52%) |Training time=0.47s (21.80%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6907|ppo_ep: 1|act_loss: -0.0452880859375|cri_loss: -0.021453857421875|unsuper_loss: 0.0
-average reward score: 5.73046875
--------------------------------------------------------------------------------------
-|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.15%) |Training time=0.49s (20.61%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.52 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6908|ppo_ep: 1|act_loss: -0.00554656982421875|cri_loss: -0.002651214599609375|unsuper_loss: 0.0
-average reward score: 5.109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.04%) |Training time=0.44s (20.26%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.54
-[2023-04-14 13:00:54,154] [INFO] [logging.py:96:log_dist] [Rank 0] step=6910, skipped=88, lr=[7.207477138987122e-07, 7.207477138987122e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:00:54,172] [INFO] [timer.py:199:stop] epoch=0/micro_step=6910/global_step=6910, RunningAvgSamplesPerSec=105.6090924949022, CurrSamplesPerSec=118.3087503415692, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:00:54,265] [INFO] [logging.py:96:log_dist] [Rank 0] step=6910, skipped=118, lr=[3.88767700704093e-07, 3.88767700704093e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6909|ppo_ep: 1|act_loss: -0.0126800537109375|cri_loss: -0.00616455078125|unsuper_loss: 0.0
-average reward score: 5.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.05%) |Training time=0.43s (20.28%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6910|ppo_ep: 1|act_loss: 0.0088958740234375|cri_loss: 0.004871368408203125|unsuper_loss: 0.0
-average reward score: 5.265625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.99%) |Training time=0.46s (21.32%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6911|ppo_ep: 1|act_loss: 0.00817108154296875|cri_loss: 0.0043182373046875|unsuper_loss: 0.0
-average reward score: 5.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.04%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6912|ppo_ep: 1|act_loss: -0.016021728515625|cri_loss: -0.007843017578125|unsuper_loss: 0.0
-average reward score: 4.8046875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.96%) |Training time=0.46s (21.37%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6913|ppo_ep: 1|act_loss: 0.0341796875|cri_loss: 0.0176849365234375|unsuper_loss: 0.0
-average reward score: 4.98828125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.40%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6914|ppo_ep: 1|act_loss: -0.0117950439453125|cri_loss: -0.00492095947265625|unsuper_loss: 0.0
-average reward score: 5.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.45s (21.18%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6915|ppo_ep: 1|act_loss: 0.0032196044921875|cri_loss: 0.001708984375|unsuper_loss: 0.0
-average reward score: 5.30078125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.17%) |Training time=0.45s (21.24%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6916|ppo_ep: 1|act_loss: -0.0145416259765625|cri_loss: -0.007175445556640625|unsuper_loss: 0.0
-average reward score: 5.06640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.17%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6917|ppo_ep: 1|act_loss: -0.0222930908203125|cri_loss: -0.01084136962890625|unsuper_loss: 0.0
-average reward score: 5.21484375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.33%) |Training time=0.45s (21.07%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6918|ppo_ep: 1|act_loss: -0.0175933837890625|cri_loss: -0.00868988037109375|unsuper_loss: 0.0
-average reward score: 5.23046875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.97%) |Training time=0.46s (21.35%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.54
-[2023-04-14 13:01:15,589] [INFO] [logging.py:96:log_dist] [Rank 0] step=6920, skipped=88, lr=[7.110112069915053e-07, 7.110112069915053e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:01:15,607] [INFO] [timer.py:199:stop] epoch=0/micro_step=6920/global_step=6920, RunningAvgSamplesPerSec=105.61367204126265, CurrSamplesPerSec=104.81772013304312, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:01:15,700] [INFO] [logging.py:96:log_dist] [Rank 0] step=6920, skipped=118, lr=[3.836285426178418e-07, 3.836285426178418e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6919|ppo_ep: 1|act_loss: -0.02325439453125|cri_loss: -0.0114898681640625|unsuper_loss: 0.0
-average reward score: 4.4921875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.73%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6920|ppo_ep: 1|act_loss: 0.0015735626220703125|cri_loss: 0.0011959075927734375|unsuper_loss: 0.0
-average reward score: 5.515625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.95%) |Training time=0.46s (21.13%) |Others=0.11 (4.92%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6921|ppo_ep: 1|act_loss: -0.00385284423828125|cri_loss: -0.0017328262329101562|unsuper_loss: 0.0
-average reward score: 5.83984375
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.58%) |Training time=0.47s (21.00%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.15 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6922|ppo_ep: 1|act_loss: -0.00983428955078125|cri_loss: -0.0047607421875|unsuper_loss: 0.0
-average reward score: 6.046875
--------------------------------------------------------------------------------------
-|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.89%) |Training time=0.49s (20.85%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.64 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6923|ppo_ep: 1|act_loss: -0.0155029296875|cri_loss: -0.007350921630859375|unsuper_loss: 0.0
-average reward score: 4.94921875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.13%) |Training time=0.45s (21.18%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6924|ppo_ep: 1|act_loss: 0.0159149169921875|cri_loss: 0.00820159912109375|unsuper_loss: 0.0
-average reward score: 5.5625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.95%) |Training time=0.46s (21.39%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6925|ppo_ep: 1|act_loss: 0.02459716796875|cri_loss: 0.01308441162109375|unsuper_loss: 0.0
-average reward score: 5.109375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.05%) |Training time=0.46s (21.28%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6926|ppo_ep: 1|act_loss: -0.02740478515625|cri_loss: -0.013519287109375|unsuper_loss: 0.0
-average reward score: 5.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.27%) |Training time=0.45s (21.06%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6927|ppo_ep: 1|act_loss: 0.019256591796875|cri_loss: 0.00994110107421875|unsuper_loss: 0.0
-average reward score: 5.46484375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.24%) |Training time=0.45s (21.18%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6928|ppo_ep: 1|act_loss: -0.00569915771484375|cri_loss: -0.002689361572265625|unsuper_loss: 0.0
-average reward score: 4.53125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.45s (21.11%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.54
-[2023-04-14 13:01:37,364] [INFO] [logging.py:96:log_dist] [Rank 0] step=6930, skipped=88, lr=[7.013356793226694e-07, 7.013356793226694e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:01:37,383] [INFO] [timer.py:199:stop] epoch=0/micro_step=6930/global_step=6930, RunningAvgSamplesPerSec=105.61659593065568, CurrSamplesPerSec=112.18822396565588, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:01:37,475] [INFO] [logging.py:96:log_dist] [Rank 0] step=6930, skipped=118, lr=[3.7852075426205644e-07, 3.7852075426205644e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6929|ppo_ep: 1|act_loss: -0.0022640228271484375|cri_loss: -0.0007987022399902344|unsuper_loss: 0.0
-average reward score: 4.76953125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.39%) |Training time=0.45s (20.94%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6930|ppo_ep: 1|act_loss: 0.00907135009765625|cri_loss: 0.00473785400390625|unsuper_loss: 0.0
-average reward score: 5.1640625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.38%) |Training time=0.45s (20.94%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6931|ppo_ep: 1|act_loss: 0.0194549560546875|cri_loss: 0.01029205322265625|unsuper_loss: 0.0
-average reward score: 5.54296875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.08%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6932|ppo_ep: 1|act_loss: -0.004150390625|cri_loss: -0.00170135498046875|unsuper_loss: 0.0
-average reward score: 5.06640625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.92%) |Training time=0.46s (21.41%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6933|ppo_ep: 1|act_loss: -0.0095367431640625|cri_loss: -0.00458526611328125|unsuper_loss: 0.0
-average reward score: 4.95703125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.18%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6934|ppo_ep: 1|act_loss: 0.0804443359375|cri_loss: 0.04730224609375|unsuper_loss: 0.0
-average reward score: 5.4609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (22.05%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6935|ppo_ep: 1|act_loss: -0.00833892822265625|cri_loss: -0.0038661956787109375|unsuper_loss: 0.0
-average reward score: 5.125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.53%) |Training time=0.47s (21.82%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6936|ppo_ep: 1|act_loss: 0.00691986083984375|cri_loss: 0.0047149658203125|unsuper_loss: 0.0
-average reward score: 5.1328125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.47s (21.58%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6937|ppo_ep: 1|act_loss: -0.0009016990661621094|cri_loss: -0.00012063980102539062|unsuper_loss: 0.0
-average reward score: 5.41796875
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.59s (69.74%) |Training time=0.59s (25.82%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.05 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6938|ppo_ep: 1|act_loss: -0.00502777099609375|cri_loss: -0.0024127960205078125|unsuper_loss: 0.0
-average reward score: 5.05859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (21.95%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.54
-[2023-04-14 13:01:59,006] [INFO] [logging.py:96:log_dist] [Rank 0] step=6940, skipped=88, lr=[6.917212743068631e-07, 6.917212743068631e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:01:59,025] [INFO] [timer.py:199:stop] epoch=0/micro_step=6940/global_step=6940, RunningAvgSamplesPerSec=105.61316551068417, CurrSamplesPerSec=110.00605527115124, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:01:59,117] [INFO] [logging.py:96:log_dist] [Rank 0] step=6940, skipped=118, lr=[3.73444411346483e-07, 3.73444411346483e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6939|ppo_ep: 1|act_loss: -0.0012607574462890625|cri_loss: -0.00046753883361816406|unsuper_loss: 0.0
-average reward score: 5.26171875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.35%) |Training time=0.45s (21.02%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6940|ppo_ep: 1|act_loss: 0.0012426376342773438|cri_loss: 0.0006957054138183594|unsuper_loss: 0.0
-average reward score: 5.62109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.28%) |Training time=0.45s (21.07%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6941|ppo_ep: 1|act_loss: -0.0323486328125|cri_loss: -0.0152435302734375|unsuper_loss: 0.0
-average reward score: 5.43359375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.65%) |Training time=0.44s (20.68%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6942|ppo_ep: 1|act_loss: 0.03973388671875|cri_loss: 0.020416259765625|unsuper_loss: 0.0
-average reward score: 5.51953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.09%) |Training time=0.46s (21.27%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6943|ppo_ep: 1|act_loss: 0.0152435302734375|cri_loss: 0.00778961181640625|unsuper_loss: 0.0
-average reward score: 4.96484375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.76%) |Training time=0.46s (21.58%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6944|ppo_ep: 1|act_loss: -0.019775390625|cri_loss: -0.00980377197265625|unsuper_loss: 0.0
-average reward score: 6.36328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.50%) |Training time=0.48s (21.88%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6945|ppo_ep: 1|act_loss: -0.0194091796875|cri_loss: -0.00959014892578125|unsuper_loss: 0.0
-average reward score: 4.8984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.74%) |Training time=0.46s (21.60%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6946|ppo_ep: 1|act_loss: -0.0009250640869140625|cri_loss: -0.00030112266540527344|unsuper_loss: 0.0
-average reward score: 6.0
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.47s (21.98%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6947|ppo_ep: 1|act_loss: -0.016876220703125|cri_loss: -0.00833892822265625|unsuper_loss: 0.0
-average reward score: 5.26953125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.60%) |Training time=0.47s (21.73%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6948|ppo_ep: 1|act_loss: -0.0037174224853515625|cri_loss: -0.0017242431640625|unsuper_loss: 0.0
-average reward score: 5.1640625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.56%) |Training time=0.47s (21.78%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.54
-[2023-04-14 13:02:20,540] [INFO] [logging.py:96:log_dist] [Rank 0] step=6950, skipped=88, lr=[6.821681344527626e-07, 6.821681344527626e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:02:20,558] [INFO] [timer.py:199:stop] epoch=0/micro_step=6950/global_step=6950, RunningAvgSamplesPerSec=105.61406004000848, CurrSamplesPerSec=104.52931262752917, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:02:20,651] [INFO] [logging.py:96:log_dist] [Rank 0] step=6950, skipped=118, lr=[3.683995891147696e-07, 3.683995891147696e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6949|ppo_ep: 1|act_loss: -0.019744873046875|cri_loss: -0.00972747802734375|unsuper_loss: 0.0
-average reward score: 4.98828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.80%) |Training time=0.47s (21.69%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6950|ppo_ep: 1|act_loss: 0.0229644775390625|cri_loss: 0.01183319091796875|unsuper_loss: 0.0
-average reward score: 5.9609375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.02%) |Training time=0.47s (21.40%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6951|ppo_ep: 1|act_loss: -0.0074310302734375|cri_loss: -0.0035552978515625|unsuper_loss: 0.0
-average reward score: 5.6640625
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.68s (74.76%) |Training time=0.47s (20.80%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.20 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6952|ppo_ep: 1|act_loss: -0.001399993896484375|cri_loss: 0.001354217529296875|unsuper_loss: 0.0
-average reward score: 5.0625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.54%) |Training time=0.47s (21.82%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6953|ppo_ep: 1|act_loss: -0.0012025833129882812|cri_loss: -0.0003528594970703125|unsuper_loss: 0.0
-average reward score: 4.84375
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.89%) |Training time=0.46s (19.80%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6954|ppo_ep: 1|act_loss: 0.03936767578125|cri_loss: 0.0204010009765625|unsuper_loss: 0.0
-average reward score: 5.41796875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.72%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6955|ppo_ep: 1|act_loss: 0.0828857421875|cri_loss: 0.0494384765625|unsuper_loss: 0.0
-average reward score: 5.48828125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.11%) |Training time=0.46s (21.34%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6956|ppo_ep: 1|act_loss: 0.0193939208984375|cri_loss: 0.010009765625|unsuper_loss: 0.0
-average reward score: 5.4609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.48s (22.02%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6957|ppo_ep: 1|act_loss: -0.0172119140625|cri_loss: -0.00844573974609375|unsuper_loss: 0.0
-average reward score: 5.296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.80%) |Training time=0.46s (21.49%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6958|ppo_ep: 1|act_loss: 0.001201629638671875|cri_loss: 0.0013179779052734375|unsuper_loss: 0.0
-average reward score: 4.984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.46s (21.46%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.54
-[2023-04-14 13:02:42,376] [INFO] [logging.py:96:log_dist] [Rank 0] step=6960, skipped=88, lr=[6.726764013609494e-07, 6.726764013609494e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:02:42,395] [INFO] [timer.py:199:stop] epoch=0/micro_step=6960/global_step=6960, RunningAvgSamplesPerSec=105.61365815136743, CurrSamplesPerSec=104.42098973666357, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:02:42,487] [INFO] [logging.py:96:log_dist] [Rank 0] step=6960, skipped=118, lr=[3.63386362343352e-07, 3.63386362343352e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6959|ppo_ep: 1|act_loss: -0.0089263916015625|cri_loss: -0.00392913818359375|unsuper_loss: 0.0
-average reward score: 5.41796875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.76%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6960|ppo_ep: 1|act_loss: 0.005954742431640625|cri_loss: 0.003894805908203125|unsuper_loss: 0.0
-average reward score: 5.484375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.64%) |Training time=0.47s (21.71%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6961|ppo_ep: 1|act_loss: -0.0018558502197265625|cri_loss: -0.0008111000061035156|unsuper_loss: 0.0
-average reward score: 4.68359375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.94%) |Training time=0.46s (21.50%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6962|ppo_ep: 1|act_loss: -0.0201416015625|cri_loss: -0.007419586181640625|unsuper_loss: 0.0
-average reward score: 5.4453125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.58%) |Training time=0.47s (21.76%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6963|ppo_ep: 1|act_loss: -0.01812744140625|cri_loss: -0.00882720947265625|unsuper_loss: 0.0
-average reward score: 5.15625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.59%) |Training time=0.47s (21.86%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6964|ppo_ep: 1|act_loss: -0.00725555419921875|cri_loss: -0.0033168792724609375|unsuper_loss: 0.0
-average reward score: 5.37109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.54%) |Training time=0.47s (21.80%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6965|ppo_ep: 1|act_loss: 0.0039215087890625|cri_loss: 0.002716064453125|unsuper_loss: 0.0
-average reward score: 5.1328125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.81%) |Training time=0.46s (21.54%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6966|ppo_ep: 1|act_loss: 0.0004565715789794922|cri_loss: 0.00033473968505859375|unsuper_loss: 0.0
-average reward score: 5.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.47s (21.98%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6967|ppo_ep: 1|act_loss: -0.090576171875|cri_loss: -0.0345458984375|unsuper_loss: 0.0
-average reward score: 5.4921875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.77%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6968|ppo_ep: 1|act_loss: 0.017822265625|cri_loss: 0.00933074951171875|unsuper_loss: 0.0
-average reward score: 5.109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.68%) |Training time=0.47s (21.68%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.54
-[2023-04-14 13:03:04,094] [INFO] [logging.py:96:log_dist] [Rank 0] step=6970, skipped=88, lr=[6.632462157218022e-07, 6.632462157218022e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:03:04,112] [INFO] [timer.py:199:stop] epoch=0/micro_step=6970/global_step=6970, RunningAvgSamplesPerSec=105.61183941805801, CurrSamplesPerSec=100.77752732722541, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:03:04,205] [INFO] [logging.py:96:log_dist] [Rank 0] step=6970, skipped=118, lr=[3.5840480534034355e-07, 3.5840480534034355e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6969|ppo_ep: 1|act_loss: 0.007671356201171875|cri_loss: 0.004016876220703125|unsuper_loss: 0.0
-average reward score: 5.8984375
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.23%) |Training time=0.48s (20.51%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.66 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6970|ppo_ep: 1|act_loss: 0.0205230712890625|cri_loss: 0.01056671142578125|unsuper_loss: 0.0
-average reward score: 5.0234375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.63%) |Training time=0.47s (21.71%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6971|ppo_ep: 1|act_loss: -0.00946807861328125|cri_loss: -0.004543304443359375|unsuper_loss: 0.0
-average reward score: 5.45703125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.67%) |Training time=0.47s (21.78%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6972|ppo_ep: 1|act_loss: -0.007415771484375|cri_loss: -0.003566741943359375|unsuper_loss: 0.0
-average reward score: 5.80859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.57%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6973|ppo_ep: 1|act_loss: -0.034423828125|cri_loss: -0.0169219970703125|unsuper_loss: 0.0
-average reward score: 5.34765625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.73%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6974|ppo_ep: 1|act_loss: 0.00567626953125|cri_loss: 0.002979278564453125|unsuper_loss: 0.0
-average reward score: 5.0625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.64%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6975|ppo_ep: 1|act_loss: -0.020263671875|cri_loss: -0.009857177734375|unsuper_loss: 0.0
-average reward score: 5.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.71%) |Training time=0.47s (21.73%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6976|ppo_ep: 1|act_loss: 0.00650787353515625|cri_loss: 0.0033054351806640625|unsuper_loss: 0.0
-average reward score: 5.28515625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.49%) |Training time=0.47s (21.86%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6977|ppo_ep: 1|act_loss: -0.0411376953125|cri_loss: -0.019805908203125|unsuper_loss: 0.0
-average reward score: 4.625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.71%) |Training time=0.46s (21.63%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6978|ppo_ep: 1|act_loss: 0.01885986328125|cri_loss: 0.0095672607421875|unsuper_loss: 0.0
-average reward score: 5.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.40%) |Training time=0.47s (21.97%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.54
-[2023-04-14 13:03:25,646] [INFO] [logging.py:96:log_dist] [Rank 0] step=6980, skipped=88, lr=[6.53877717313422e-07, 6.53877717313422e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:03:25,664] [INFO] [timer.py:199:stop] epoch=0/micro_step=6980/global_step=6980, RunningAvgSamplesPerSec=105.6101181820172, CurrSamplesPerSec=102.82117836505277, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:03:25,757] [INFO] [logging.py:96:log_dist] [Rank 0] step=6980, skipped=118, lr=[3.5345499194443663e-07, 3.5345499194443663e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6979|ppo_ep: 1|act_loss: -0.0111846923828125|cri_loss: -0.00531768798828125|unsuper_loss: 0.0
-average reward score: 4.7109375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.86%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6980|ppo_ep: 1|act_loss: -0.02044677734375|cri_loss: -0.00836181640625|unsuper_loss: 0.0
-average reward score: 5.8671875
--------------------------------------------------------------------------------------
-|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.64s (71.45%) |Training time=0.55s (24.16%) |Others=0.10 (4.38%)|CurSamplesPerSec=13.94 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6981|ppo_ep: 1|act_loss: -0.0110626220703125|cri_loss: -0.005458831787109375|unsuper_loss: 0.0
-average reward score: 5.90625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.54%) |Training time=0.47s (21.83%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6982|ppo_ep: 1|act_loss: 0.0313720703125|cri_loss: 0.01617431640625|unsuper_loss: 0.0
-average reward score: 4.60546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.54%) |Training time=0.47s (21.81%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6983|ppo_ep: 1|act_loss: 0.0027866363525390625|cri_loss: 0.0015430450439453125|unsuper_loss: 0.0
-average reward score: 5.45703125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.91%) |Training time=0.46s (21.43%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6984|ppo_ep: 1|act_loss: -0.00066375732421875|cri_loss: -0.0002378225326538086|unsuper_loss: 0.0
-average reward score: 5.03125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.91%) |Training time=0.46s (21.43%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6985|ppo_ep: 1|act_loss: 0.031341552734375|cri_loss: 0.015899658203125|unsuper_loss: 0.0
-average reward score: 6.01171875
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.57%) |Training time=0.47s (20.19%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6986|ppo_ep: 1|act_loss: 0.0004143714904785156|cri_loss: 0.00025391578674316406|unsuper_loss: 0.0
-average reward score: 5.1171875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.46s (21.61%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6987|ppo_ep: 1|act_loss: 0.005809783935546875|cri_loss: 0.0031070709228515625|unsuper_loss: 0.0
-average reward score: 4.97265625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.61%) |Training time=0.44s (20.71%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6988|ppo_ep: 1|act_loss: 0.029205322265625|cri_loss: 0.0149383544921875|unsuper_loss: 0.0
-average reward score: 5.1328125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.17%) |Training time=0.46s (21.19%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.54
-[2023-04-14 13:03:47,461] [INFO] [logging.py:96:log_dist] [Rank 0] step=6990, skipped=88, lr=[6.445710449995562e-07, 6.445710449995562e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:03:47,479] [INFO] [timer.py:199:stop] epoch=0/micro_step=6990/global_step=6990, RunningAvgSamplesPerSec=105.60882625178036, CurrSamplesPerSec=114.9070274944245, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:03:47,572] [INFO] [logging.py:96:log_dist] [Rank 0] step=6990, skipped=118, lr=[3.48536995523808e-07, 3.48536995523808e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6989|ppo_ep: 1|act_loss: -0.0030765533447265625|cri_loss: -0.0014123916625976562|unsuper_loss: 0.0
-average reward score: 5.17578125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.87%) |Training time=0.44s (20.58%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6990|ppo_ep: 1|act_loss: -0.019012451171875|cri_loss: -0.0092620849609375|unsuper_loss: 0.0
-average reward score: 5.05078125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.51%) |Training time=0.45s (20.84%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6991|ppo_ep: 1|act_loss: 0.0038299560546875|cri_loss: 0.00209808349609375|unsuper_loss: 0.0
-average reward score: 5.2578125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.55%) |Training time=0.45s (20.80%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6992|ppo_ep: 1|act_loss: -0.0175018310546875|cri_loss: -0.0086212158203125|unsuper_loss: 0.0
-average reward score: 5.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (20.95%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6993|ppo_ep: 1|act_loss: -0.0030231475830078125|cri_loss: -0.00106048583984375|unsuper_loss: 0.0
-average reward score: 5.83203125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.39%) |Training time=0.45s (21.06%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6994|ppo_ep: 1|act_loss: -0.0001442432403564453|cri_loss: 3.1948089599609375e-05|unsuper_loss: 0.0
-average reward score: 5.75
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.29%) |Training time=0.45s (21.05%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6995|ppo_ep: 1|act_loss: 0.0048828125|cri_loss: 0.002719879150390625|unsuper_loss: 0.0
-average reward score: 4.8359375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.28%) |Training time=0.46s (21.16%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6996|ppo_ep: 1|act_loss: 0.0269775390625|cri_loss: 0.01395416259765625|unsuper_loss: 0.0
-average reward score: 4.69921875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.66%) |Training time=0.46s (21.60%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6997|ppo_ep: 1|act_loss: 0.00444793701171875|cri_loss: 0.0022983551025390625|unsuper_loss: 0.0
-average reward score: 5.73046875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.78%) |Training time=0.46s (21.55%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.54
-epoch: 0|step: 6998|ppo_ep: 1|act_loss: -0.0012836456298828125|cri_loss: -0.00047588348388671875|unsuper_loss: 0.0
-average reward score: 5.37109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.41%) |Training time=0.47s (21.91%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.55
-[2023-04-14 13:04:08,962] [INFO] [logging.py:96:log_dist] [Rank 0] step=7000, skipped=88, lr=[6.353263367275399e-07, 6.353263367275399e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:04:08,980] [INFO] [timer.py:199:stop] epoch=0/micro_step=7000/global_step=7000, RunningAvgSamplesPerSec=105.61414569249841, CurrSamplesPerSec=114.3478458671951, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:04:09,073] [INFO] [logging.py:96:log_dist] [Rank 0] step=7000, skipped=118, lr=[3.43650888975027e-07, 3.43650888975027e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 6999|ppo_ep: 1|act_loss: -0.026153564453125|cri_loss: -0.012939453125|unsuper_loss: 0.0
-average reward score: 5.78125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.84%) |Training time=0.44s (20.60%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7000|ppo_ep: 1|act_loss: 0.012725830078125|cri_loss: 0.00653839111328125|unsuper_loss: 0.0
-average reward score: 5.26953125
--------------------------------------------------------------------------------------
-|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.61s (71.96%) |Training time=0.46s (20.72%) |Others=0.16 (7.32%)|CurSamplesPerSec=14.27 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7001|ppo_ep: 1|act_loss: -0.005931854248046875|cri_loss: -0.0025920867919921875|unsuper_loss: 0.0
-average reward score: 4.66796875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.04%) |Training time=0.44s (20.36%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7002|ppo_ep: 1|act_loss: -0.0015783309936523438|cri_loss: -0.0006132125854492188|unsuper_loss: 0.0
-average reward score: 4.69140625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.45s (21.07%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7003|ppo_ep: 1|act_loss: 0.017547607421875|cri_loss: 0.009063720703125|unsuper_loss: 0.0
-average reward score: 5.6171875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.77%) |Training time=0.44s (20.65%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7004|ppo_ep: 1|act_loss: 0.03875732421875|cri_loss: 0.0197906494140625|unsuper_loss: 0.0
-average reward score: 5.015625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.32%) |Training time=0.45s (21.02%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.55
-[2023-04-14 13:04:22,083] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 7005|ppo_ep: 1|act_loss: 0.0027103424072265625|cri_loss: 0.0014028549194335938|unsuper_loss: 0.0
-average reward score: 5.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.47%) |Training time=0.46s (21.29%) |Others=0.09 (4.23%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.55
-[2023-04-14 13:04:24,221] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 7006|ppo_ep: 1|act_loss: -0.00681304931640625|cri_loss: -0.0032558441162109375|unsuper_loss: 0.0
-average reward score: 5.15234375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.77%) |Training time=0.45s (21.04%) |Others=0.09 (4.20%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7007|ppo_ep: 1|act_loss: -0.01065826416015625|cri_loss: -0.005084991455078125|unsuper_loss: 0.0
-average reward score: 5.125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.07%) |Training time=0.46s (21.32%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7008|ppo_ep: 1|act_loss: 0.01336669921875|cri_loss: 0.007213592529296875|unsuper_loss: 0.0
-average reward score: 5.6875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.34%) |Training time=0.45s (21.00%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.55
-[2023-04-14 13:04:30,588] [INFO] [logging.py:96:log_dist] [Rank 0] step=7010, skipped=88, lr=[6.261437295262503e-07, 6.261437295262503e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:04:30,607] [INFO] [timer.py:199:stop] epoch=0/micro_step=7010/global_step=7010, RunningAvgSamplesPerSec=105.62016992871143, CurrSamplesPerSec=106.12538378393765, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:04:30,699] [INFO] [logging.py:96:log_dist] [Rank 0] step=7010, skipped=120, lr=[3.397650131284941e-07, 3.397650131284941e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7009|ppo_ep: 1|act_loss: 0.00827789306640625|cri_loss: 0.00453948974609375|unsuper_loss: 0.0
-average reward score: 5.8828125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.24%) |Training time=0.46s (21.28%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7010|ppo_ep: 1|act_loss: 0.003917694091796875|cri_loss: 0.0020294189453125|unsuper_loss: 0.0
-average reward score: 5.19140625
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.48%) |Training time=0.45s (20.07%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.25 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7011|ppo_ep: 1|act_loss: 0.00323486328125|cri_loss: 0.0018863677978515625|unsuper_loss: 0.0
-average reward score: 5.08984375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.54%) |Training time=0.45s (20.79%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7012|ppo_ep: 1|act_loss: 0.0350341796875|cri_loss: 0.0192108154296875|unsuper_loss: 0.0
-average reward score: 5.41015625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.56%) |Training time=0.45s (20.77%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7013|ppo_ep: 1|act_loss: 0.01261138916015625|cri_loss: 0.00699615478515625|unsuper_loss: 0.0
-average reward score: 5.50390625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.54%) |Training time=0.45s (20.88%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7014|ppo_ep: 1|act_loss: -0.021697998046875|cri_loss: -0.01053619384765625|unsuper_loss: 0.0
-average reward score: 5.375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.68%) |Training time=0.45s (20.68%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7015|ppo_ep: 1|act_loss: -0.0042572021484375|cri_loss: -0.0020732879638671875|unsuper_loss: 0.0
-average reward score: 6.0703125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.75%) |Training time=0.44s (20.67%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7016|ppo_ep: 1|act_loss: -0.01311492919921875|cri_loss: -0.00646209716796875|unsuper_loss: 0.0
-average reward score: 4.98046875
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.76s (76.24%) |Training time=0.45s (19.44%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7017|ppo_ep: 1|act_loss: -0.0018291473388671875|cri_loss: -0.0008478164672851562|unsuper_loss: 0.0
-average reward score: 5.03125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.45s (21.14%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7018|ppo_ep: 1|act_loss: 0.03436279296875|cri_loss: 0.017791748046875|unsuper_loss: 0.0
-average reward score: 5.73828125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.64%) |Training time=0.44s (20.68%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.55
-[2023-04-14 13:04:52,328] [INFO] [logging.py:96:log_dist] [Rank 0] step=7020, skipped=88, lr=[6.170233595040777e-07, 6.170233595040777e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:04:52,347] [INFO] [timer.py:199:stop] epoch=0/micro_step=7020/global_step=7020, RunningAvgSamplesPerSec=105.6290418746056, CurrSamplesPerSec=111.25124374398021, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:04:52,439] [INFO] [logging.py:96:log_dist] [Rank 0] step=7020, skipped=120, lr=[3.3493649053890325e-07, 3.3493649053890325e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7019|ppo_ep: 1|act_loss: 0.0511474609375|cri_loss: 0.0259552001953125|unsuper_loss: 0.0
-average reward score: 5.51953125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.47%) |Training time=0.45s (20.98%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7020|ppo_ep: 1|act_loss: -0.01282501220703125|cri_loss: -0.0062713623046875|unsuper_loss: 0.0
-average reward score: 5.234375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.37%) |Training time=0.45s (20.96%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7021|ppo_ep: 1|act_loss: 0.04193115234375|cri_loss: 0.0215911865234375|unsuper_loss: 0.0
-average reward score: 5.45703125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.42%) |Training time=0.45s (21.02%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7022|ppo_ep: 1|act_loss: -0.0235748291015625|cri_loss: -0.01166534423828125|unsuper_loss: 0.0
-average reward score: 5.73046875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.16%) |Training time=0.45s (21.19%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7023|ppo_ep: 1|act_loss: -0.025726318359375|cri_loss: -0.01245880126953125|unsuper_loss: 0.0
-average reward score: 5.98046875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.44%) |Training time=0.45s (20.90%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7024|ppo_ep: 1|act_loss: 0.0184783935546875|cri_loss: 0.009521484375|unsuper_loss: 0.0
-average reward score: 5.05859375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.70%) |Training time=0.45s (20.73%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7025|ppo_ep: 1|act_loss: 0.004638671875|cri_loss: 0.0024890899658203125|unsuper_loss: 0.0
-average reward score: 5.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.20%) |Training time=0.43s (20.21%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7026|ppo_ep: 1|act_loss: -0.016021728515625|cri_loss: -0.007556915283203125|unsuper_loss: 0.0
-average reward score: 5.28125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.92%) |Training time=0.44s (20.41%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7027|ppo_ep: 1|act_loss: 0.003475189208984375|cri_loss: 0.0026454925537109375|unsuper_loss: 0.0
-average reward score: 6.0234375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.90%) |Training time=0.44s (20.41%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7028|ppo_ep: 1|act_loss: -0.01137542724609375|cri_loss: -0.005615234375|unsuper_loss: 0.0
-average reward score: 5.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.90%) |Training time=0.44s (20.44%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.55
-[2023-04-14 13:05:13,796] [INFO] [logging.py:96:log_dist] [Rank 0] step=7030, skipped=88, lr=[6.079653618469082e-07, 6.079653618469082e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:05:13,814] [INFO] [timer.py:199:stop] epoch=0/micro_step=7030/global_step=7030, RunningAvgSamplesPerSec=105.64047894564446, CurrSamplesPerSec=121.2592382456413, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:05:13,907] [INFO] [logging.py:96:log_dist] [Rank 0] step=7030, skipped=120, lr=[3.3014005941345406e-07, 3.3014005941345406e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7029|ppo_ep: 1|act_loss: -0.0184326171875|cri_loss: -0.00904083251953125|unsuper_loss: 0.0
-average reward score: 5.6875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.61%) |Training time=0.43s (19.83%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7030|ppo_ep: 1|act_loss: 0.02105712890625|cri_loss: 0.0115814208984375|unsuper_loss: 0.0
-average reward score: 5.28125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.10%) |Training time=0.43s (20.22%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7031|ppo_ep: 1|act_loss: 0.05078125|cri_loss: 0.026123046875|unsuper_loss: 0.0
-average reward score: 5.375
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.72%) |Training time=0.44s (19.03%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7032|ppo_ep: 1|act_loss: -0.00595855712890625|cri_loss: -0.00264739990234375|unsuper_loss: 0.0
-average reward score: 5.5234375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.95%) |Training time=0.44s (20.40%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7033|ppo_ep: 1|act_loss: -0.0072021484375|cri_loss: -0.003490447998046875|unsuper_loss: 0.0
-average reward score: 5.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.13%) |Training time=0.44s (20.33%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7034|ppo_ep: 1|act_loss: 0.0024433135986328125|cri_loss: 0.0013751983642578125|unsuper_loss: 0.0
-average reward score: 5.55078125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.65%) |Training time=0.45s (20.69%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7035|ppo_ep: 1|act_loss: 0.00522613525390625|cri_loss: 0.0029888153076171875|unsuper_loss: 0.0
-average reward score: 4.90625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.80%) |Training time=0.44s (20.62%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7036|ppo_ep: 1|act_loss: -0.01873779296875|cri_loss: -0.0090789794921875|unsuper_loss: 0.0
-average reward score: 5.03125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.91%) |Training time=0.44s (20.43%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7037|ppo_ep: 1|act_loss: 0.0185394287109375|cri_loss: 0.00945281982421875|unsuper_loss: 0.0
-average reward score: 5.1875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.01%) |Training time=0.44s (20.40%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7038|ppo_ep: 1|act_loss: -0.0100555419921875|cri_loss: -0.004840850830078125|unsuper_loss: 0.0
-average reward score: 5.21484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.00%) |Training time=0.44s (20.35%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55
-[2023-04-14 13:05:35,490] [INFO] [logging.py:96:log_dist] [Rank 0] step=7040, skipped=88, lr=[5.989698708161196e-07, 5.989698708161196e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:05:35,509] [INFO] [timer.py:199:stop] epoch=0/micro_step=7040/global_step=7040, RunningAvgSamplesPerSec=105.6536996369175, CurrSamplesPerSec=118.7872625896097, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:05:35,601] [INFO] [logging.py:96:log_dist] [Rank 0] step=7040, skipped=120, lr=[3.253757908468269e-07, 3.253757908468269e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7039|ppo_ep: 1|act_loss: -0.0106964111328125|cri_loss: -0.005218505859375|unsuper_loss: 0.0
-average reward score: 5.48828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.62%) |Training time=0.43s (19.87%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7040|ppo_ep: 1|act_loss: -0.014617919921875|cri_loss: -0.006984710693359375|unsuper_loss: 0.0
-average reward score: 5.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.71s (76.03%) |Training time=0.44s (19.51%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.25 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7041|ppo_ep: 1|act_loss: 0.017822265625|cri_loss: 0.00927734375|unsuper_loss: 0.0
-average reward score: 5.41796875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.94%) |Training time=0.44s (20.39%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7042|ppo_ep: 1|act_loss: -0.01445770263671875|cri_loss: -0.007171630859375|unsuper_loss: 0.0
-average reward score: 5.0859375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.98%) |Training time=0.44s (20.36%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7043|ppo_ep: 1|act_loss: -0.0104522705078125|cri_loss: -0.0050811767578125|unsuper_loss: 0.0
-average reward score: 5.09375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.82%) |Training time=0.44s (20.51%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7044|ppo_ep: 1|act_loss: 0.016082763671875|cri_loss: 0.00823211669921875|unsuper_loss: 0.0
-average reward score: 5.6953125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.31%) |Training time=0.43s (19.98%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7045|ppo_ep: 1|act_loss: -0.027099609375|cri_loss: -0.013214111328125|unsuper_loss: 0.0
-average reward score: 5.62890625
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.15%) |Training time=0.50s (22.40%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.43 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7046|ppo_ep: 1|act_loss: 0.0109405517578125|cri_loss: 0.005657196044921875|unsuper_loss: 0.0
-average reward score: 5.72265625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.04%) |Training time=0.44s (20.30%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7047|ppo_ep: 1|act_loss: -0.02777099609375|cri_loss: -0.01348114013671875|unsuper_loss: 0.0
-average reward score: 5.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.05%) |Training time=0.44s (20.36%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7048|ppo_ep: 1|act_loss: -0.02685546875|cri_loss: -0.0132293701171875|unsuper_loss: 0.0
-average reward score: 5.546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.89%) |Training time=0.44s (20.43%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.55
-[2023-04-14 13:05:57,120] [INFO] [logging.py:96:log_dist] [Rank 0] step=7050, skipped=88, lr=[5.900370197465865e-07, 5.900370197465865e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:05:57,139] [INFO] [timer.py:199:stop] epoch=0/micro_step=7050/global_step=7050, RunningAvgSamplesPerSec=105.66728407954258, CurrSamplesPerSec=116.75401299782442, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:05:57,231] [INFO] [logging.py:96:log_dist] [Rank 0] step=7050, skipped=120, lr=[3.206437554569741e-07, 3.206437554569741e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7049|ppo_ep: 1|act_loss: 0.002655029296875|cri_loss: 0.001415252685546875|unsuper_loss: 0.0
-average reward score: 5.84375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.05%) |Training time=0.44s (20.37%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7050|ppo_ep: 1|act_loss: -0.005207061767578125|cri_loss: -0.002410888671875|unsuper_loss: 0.0
-average reward score: 5.62109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.94%) |Training time=0.44s (20.38%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7051|ppo_ep: 1|act_loss: 0.00331878662109375|cri_loss: 0.0018138885498046875|unsuper_loss: 0.0
-average reward score: 5.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.00%) |Training time=0.44s (20.41%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7052|ppo_ep: 1|act_loss: 0.0107879638671875|cri_loss: 0.005695343017578125|unsuper_loss: 0.0
-average reward score: 5.625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.98%) |Training time=0.44s (20.36%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7053|ppo_ep: 1|act_loss: -0.0012302398681640625|cri_loss: -0.0005526542663574219|unsuper_loss: 0.0
-average reward score: 6.0625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.50%) |Training time=0.43s (19.90%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7054|ppo_ep: 1|act_loss: -0.007366180419921875|cri_loss: -0.002979278564453125|unsuper_loss: 0.0
-average reward score: 5.359375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.93%) |Training time=0.44s (20.38%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7055|ppo_ep: 1|act_loss: 0.01214599609375|cri_loss: 0.006175994873046875|unsuper_loss: 0.0
-average reward score: 5.76171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.05%) |Training time=0.44s (20.41%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7056|ppo_ep: 1|act_loss: 0.006378173828125|cri_loss: 0.0032520294189453125|unsuper_loss: 0.0
-average reward score: 6.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.86%) |Training time=0.44s (20.48%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7057|ppo_ep: 1|act_loss: 0.006343841552734375|cri_loss: 0.003448486328125|unsuper_loss: 0.0
-average reward score: 5.9453125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.13%) |Training time=0.44s (20.28%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7058|ppo_ep: 1|act_loss: 0.0113677978515625|cri_loss: 0.005924224853515625|unsuper_loss: 0.0
-average reward score: 5.15625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.17%) |Training time=0.43s (20.15%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.55
-[2023-04-14 13:06:18,609] [INFO] [logging.py:96:log_dist] [Rank 0] step=7060, skipped=88, lr=[5.811669410447133e-07, 5.811669410447133e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:06:18,627] [INFO] [timer.py:199:stop] epoch=0/micro_step=7060/global_step=7060, RunningAvgSamplesPerSec=105.68131365475776, CurrSamplesPerSec=114.69336225070093, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:06:18,721] [INFO] [logging.py:96:log_dist] [Rank 0] step=7060, skipped=120, lr=[3.1594402338407633e-07, 3.1594402338407633e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7059|ppo_ep: 1|act_loss: 0.0213165283203125|cri_loss: 0.01114654541015625|unsuper_loss: 0.0
-average reward score: 5.72265625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.86%) |Training time=0.44s (20.56%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7060|ppo_ep: 1|act_loss: -0.0136260986328125|cri_loss: -0.0066986083984375|unsuper_loss: 0.0
-average reward score: 5.3671875
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.62s (72.07%) |Training time=0.53s (23.48%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.22 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7061|ppo_ep: 1|act_loss: 0.0153656005859375|cri_loss: 0.00801849365234375|unsuper_loss: 0.0
-average reward score: 5.81640625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.12%) |Training time=0.47s (21.37%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7062|ppo_ep: 1|act_loss: -0.003650665283203125|cri_loss: -0.0014925003051757812|unsuper_loss: 0.0
-average reward score: 5.703125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.65%) |Training time=0.45s (20.70%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7063|ppo_ep: 1|act_loss: -0.0135955810546875|cri_loss: -0.00644683837890625|unsuper_loss: 0.0
-average reward score: 5.3046875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.78%) |Training time=0.43s (19.65%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7064|ppo_ep: 1|act_loss: -0.035491943359375|cri_loss: -0.017486572265625|unsuper_loss: 0.0
-average reward score: 5.26953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.67s (77.20%) |Training time=0.39s (18.02%) |Others=0.10 (4.78%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.55
-[2023-04-14 13:06:31,684] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 7065|ppo_ep: 1|act_loss: 0.0009374618530273438|cri_loss: 0.0006699562072753906|unsuper_loss: 0.0
-average reward score: 5.7421875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.76%) |Training time=0.42s (19.61%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7066|ppo_ep: 1|act_loss: -0.005615234375|cri_loss: -0.0027637481689453125|unsuper_loss: 0.0
-average reward score: 5.88671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.89%) |Training time=0.44s (20.38%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7067|ppo_ep: 1|act_loss: -0.010498046875|cri_loss: -0.004825592041015625|unsuper_loss: 0.0
-average reward score: 5.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.76%) |Training time=0.45s (20.65%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7068|ppo_ep: 1|act_loss: 0.00209808349609375|cri_loss: 0.00121307373046875|unsuper_loss: 0.0
-average reward score: 6.6171875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.81%) |Training time=0.44s (20.50%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.55
-[2023-04-14 13:06:40,356] [INFO] [logging.py:96:log_dist] [Rank 0] step=7070, skipped=89, lr=[5.73237649271517e-07, 5.73237649271517e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:06:40,357] [INFO] [timer.py:199:stop] epoch=0/micro_step=7070/global_step=7070, RunningAvgSamplesPerSec=105.69375759915556, CurrSamplesPerSec=115.86235091814396, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:06:40,469] [INFO] [logging.py:96:log_dist] [Rank 0] step=7070, skipped=120, lr=[3.1127666428949986e-07, 3.1127666428949986e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7069|ppo_ep: 1|act_loss: 0.012603759765625|cri_loss: 0.00643157958984375|unsuper_loss: 0.0
-average reward score: 5.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.66s (74.89%) |Training time=0.45s (20.20%) |Others=0.11 (4.91%)|CurSamplesPerSec=14.45 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7070|ppo_ep: 1|act_loss: -0.01456451416015625|cri_loss: -0.00681304931640625|unsuper_loss: 0.0
-average reward score: 5.0078125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.87%) |Training time=0.44s (20.44%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7071|ppo_ep: 1|act_loss: -0.023345947265625|cri_loss: -0.0114288330078125|unsuper_loss: 0.0
-average reward score: 6.2578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.38%) |Training time=0.45s (20.95%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7072|ppo_ep: 1|act_loss: 0.0129547119140625|cri_loss: 0.0068206787109375|unsuper_loss: 0.0
-average reward score: 6.015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.30%) |Training time=0.45s (21.01%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7073|ppo_ep: 1|act_loss: -0.00811004638671875|cri_loss: -0.003925323486328125|unsuper_loss: 0.0
-average reward score: 5.7109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.56%) |Training time=0.45s (20.86%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7074|ppo_ep: 1|act_loss: 0.0012836456298828125|cri_loss: 0.0007758140563964844|unsuper_loss: 0.0
-average reward score: 6.1484375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.44%) |Training time=0.45s (20.90%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7075|ppo_ep: 1|act_loss: -0.0047607421875|cri_loss: -0.002140045166015625|unsuper_loss: 0.0
-average reward score: 5.55859375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.88%) |Training time=0.44s (20.46%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7076|ppo_ep: 1|act_loss: -0.010772705078125|cri_loss: -0.005107879638671875|unsuper_loss: 0.0
-average reward score: 5.83984375
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.14%) |Training time=0.46s (19.57%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7077|ppo_ep: 1|act_loss: 0.005046844482421875|cri_loss: 0.0028209686279296875|unsuper_loss: 0.0
-average reward score: 5.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.43%) |Training time=0.45s (21.01%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7078|ppo_ep: 1|act_loss: -0.0077362060546875|cri_loss: -0.003391265869140625|unsuper_loss: 0.0
-average reward score: 5.203125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.75%) |Training time=0.44s (20.58%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.55
-[2023-04-14 13:07:02,095] [INFO] [logging.py:96:log_dist] [Rank 0] step=7080, skipped=89, lr=[5.644871995139011e-07, 5.644871995139011e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:07:02,113] [INFO] [timer.py:199:stop] epoch=0/micro_step=7080/global_step=7080, RunningAvgSamplesPerSec=105.70222970266056, CurrSamplesPerSec=112.85040635914649, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:07:02,205] [INFO] [logging.py:96:log_dist] [Rank 0] step=7080, skipped=120, lr=[3.066417473547667e-07, 3.066417473547667e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7079|ppo_ep: 1|act_loss: 0.032135009765625|cri_loss: 0.0170440673828125|unsuper_loss: 0.0
-average reward score: 6.09375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.68%) |Training time=0.45s (20.69%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7080|ppo_ep: 1|act_loss: -0.023345947265625|cri_loss: -0.0113067626953125|unsuper_loss: 0.0
-average reward score: 5.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.48%) |Training time=0.45s (20.85%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7081|ppo_ep: 1|act_loss: 0.00737762451171875|cri_loss: 0.004241943359375|unsuper_loss: 0.0
-average reward score: 5.125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.87%) |Training time=0.44s (20.44%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7082|ppo_ep: 1|act_loss: -0.0083160400390625|cri_loss: -0.0031337738037109375|unsuper_loss: 0.0
-average reward score: 5.45703125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.76%) |Training time=0.47s (21.59%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7083|ppo_ep: 1|act_loss: 0.008056640625|cri_loss: 0.00444793701171875|unsuper_loss: 0.0
-average reward score: 5.6484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.10%) |Training time=0.46s (21.29%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7084|ppo_ep: 1|act_loss: -0.0013437271118164062|cri_loss: -0.00020885467529296875|unsuper_loss: 0.0
-average reward score: 5.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.89%) |Training time=0.46s (21.46%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7085|ppo_ep: 1|act_loss: 0.0086517333984375|cri_loss: 0.00460052490234375|unsuper_loss: 0.0
-average reward score: 5.81640625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.07%) |Training time=0.46s (21.26%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7086|ppo_ep: 1|act_loss: 0.0016613006591796875|cri_loss: 0.0010709762573242188|unsuper_loss: 0.0
-average reward score: 5.71484375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.08%) |Training time=0.46s (21.26%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7087|ppo_ep: 1|act_loss: 0.016571044921875|cri_loss: 0.0089569091796875|unsuper_loss: 0.0
-average reward score: 5.015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.76%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7088|ppo_ep: 1|act_loss: -0.0264129638671875|cri_loss: -0.01276397705078125|unsuper_loss: 0.0
-average reward score: 5.6953125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.65%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.55
-[2023-04-14 13:07:23,635] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
-[2023-04-14 13:07:23,636] [INFO] [logging.py:96:log_dist] [Rank 0] step=7090, skipped=90, lr=[5.566657852262237e-07, 5.566657852262237e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:07:23,636] [INFO] [timer.py:199:stop] epoch=0/micro_step=7090/global_step=7090, RunningAvgSamplesPerSec=105.70629971379196, CurrSamplesPerSec=114.99573149730027, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:07:23,728] [INFO] [logging.py:96:log_dist] [Rank 0] step=7090, skipped=120, lr=[3.020393412805259e-07, 3.020393412805259e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7089|ppo_ep: 1|act_loss: -0.05987548828125|cri_loss: -0.013641357421875|unsuper_loss: 0.0
-average reward score: 5.6640625
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.65%) |Training time=0.44s (20.68%) |Others=0.10 (4.67%)|CurSamplesPerSec=15.03 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7090|ppo_ep: 1|act_loss: 0.009307861328125|cri_loss: 0.004871368408203125|unsuper_loss: 0.0
-average reward score: 5.83203125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.26%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7091|ppo_ep: 1|act_loss: 0.00020992755889892578|cri_loss: 0.000354766845703125|unsuper_loss: 0.0
-average reward score: 6.1015625
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.12%) |Training time=0.46s (19.62%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.74 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7092|ppo_ep: 1|act_loss: 0.005802154541015625|cri_loss: 0.0029811859130859375|unsuper_loss: 0.0
-average reward score: 4.9609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.71%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7093|ppo_ep: 1|act_loss: -0.00748443603515625|cri_loss: -0.0035839080810546875|unsuper_loss: 0.0
-average reward score: 5.609375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.45%) |Training time=0.45s (20.86%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7094|ppo_ep: 1|act_loss: -0.003345489501953125|cri_loss: -0.0012674331665039062|unsuper_loss: 0.0
-average reward score: 5.34375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.51%) |Training time=0.44s (20.77%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7095|ppo_ep: 1|act_loss: 0.0189208984375|cri_loss: 0.01151275634765625|unsuper_loss: 0.0
-average reward score: 5.4453125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.56%) |Training time=0.44s (20.76%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7096|ppo_ep: 1|act_loss: -0.01073455810546875|cri_loss: -0.00524139404296875|unsuper_loss: 0.0
-average reward score: 5.16796875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.73%) |Training time=0.44s (20.58%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7097|ppo_ep: 1|act_loss: -0.009918212890625|cri_loss: -0.00482177734375|unsuper_loss: 0.0
-average reward score: 5.0234375
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.68%) |Training time=0.44s (20.61%) |Others=0.10 (4.71%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7098|ppo_ep: 1|act_loss: -0.014434814453125|cri_loss: -0.007038116455078125|unsuper_loss: 0.0
-average reward score: 5.91796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.84%) |Training time=0.46s (21.20%) |Others=0.11 (4.96%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55
-[2023-04-14 13:07:45,359] [INFO] [logging.py:96:log_dist] [Rank 0] step=7100, skipped=90, lr=[5.480354326383703e-07, 5.480354326383703e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:07:45,378] [INFO] [timer.py:199:stop] epoch=0/micro_step=7100/global_step=7100, RunningAvgSamplesPerSec=105.71372664296617, CurrSamplesPerSec=113.53677699652836, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:07:45,470] [INFO] [logging.py:96:log_dist] [Rank 0] step=7100, skipped=120, lr=[2.9746951428553884e-07, 2.9746951428553884e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7099|ppo_ep: 1|act_loss: -0.00740814208984375|cri_loss: -0.0036258697509765625|unsuper_loss: 0.0
-average reward score: 5.9609375
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.48%) |Training time=0.44s (19.75%) |Others=0.11 (4.77%)|CurSamplesPerSec=14.22 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7100|ppo_ep: 1|act_loss: 0.008270263671875|cri_loss: 0.004364013671875|unsuper_loss: 0.0
-average reward score: 5.87109375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.47%) |Training time=0.44s (20.83%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7101|ppo_ep: 1|act_loss: 0.00771331787109375|cri_loss: 0.003948211669921875|unsuper_loss: 0.0
-average reward score: 5.96875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.54%) |Training time=0.44s (20.75%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7102|ppo_ep: 1|act_loss: 0.02349853515625|cri_loss: 0.01323699951171875|unsuper_loss: 0.0
-average reward score: 5.171875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.38%) |Training time=0.45s (20.94%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7103|ppo_ep: 1|act_loss: 0.0160675048828125|cri_loss: 0.00824737548828125|unsuper_loss: 0.0
-average reward score: 5.453125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.62%) |Training time=0.44s (20.79%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7104|ppo_ep: 1|act_loss: -0.020355224609375|cri_loss: -0.009979248046875|unsuper_loss: 0.0
-average reward score: 5.8046875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.01%) |Training time=0.46s (21.33%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7105|ppo_ep: 1|act_loss: -0.00275421142578125|cri_loss: -0.00083160400390625|unsuper_loss: 0.0
-average reward score: 5.84765625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.35%) |Training time=0.45s (20.96%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7106|ppo_ep: 1|act_loss: -0.00026607513427734375|cri_loss: 5.5789947509765625e-05|unsuper_loss: 0.0
-average reward score: 5.5
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.59%) |Training time=0.47s (20.10%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.55
-[2023-04-14 13:08:02,789] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 7107|ppo_ep: 1|act_loss: -0.019775390625|cri_loss: -0.00945281982421875|unsuper_loss: 0.0
-average reward score: 5.30078125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.51%) |Training time=0.46s (21.28%) |Others=0.09 (4.21%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.55
-[2023-04-14 13:08:04,927] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 7108|ppo_ep: 1|act_loss: -0.02490234375|cri_loss: -0.01221466064453125|unsuper_loss: 0.0
-average reward score: 5.30078125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.34%) |Training time=0.46s (21.50%) |Others=0.09 (4.16%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.55
-[2023-04-14 13:08:06,968] [INFO] [logging.py:96:log_dist] [Rank 0] step=7110, skipped=90, lr=[5.394684749830206e-07, 5.394684749830206e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:08:06,986] [INFO] [timer.py:199:stop] epoch=0/micro_step=7110/global_step=7110, RunningAvgSamplesPerSec=105.7198366319902, CurrSamplesPerSec=109.52436472834691, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:08:07,079] [INFO] [logging.py:96:log_dist] [Rank 0] step=7110, skipped=122, lr=[2.9383715516136083e-07, 2.9383715516136083e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7109|ppo_ep: 1|act_loss: -0.0279388427734375|cri_loss: -0.0134124755859375|unsuper_loss: 0.0
-average reward score: 5.4921875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.22%) |Training time=0.45s (21.13%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7110|ppo_ep: 1|act_loss: -0.02520751953125|cri_loss: -0.0123291015625|unsuper_loss: 0.0
-average reward score: 5.69921875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.05%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7111|ppo_ep: 1|act_loss: -0.004817962646484375|cri_loss: -0.0023059844970703125|unsuper_loss: 0.0
-average reward score: 5.05078125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.25%) |Training time=0.46s (21.14%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7112|ppo_ep: 1|act_loss: 0.018310546875|cri_loss: 0.00978851318359375|unsuper_loss: 0.0
-average reward score: 5.0234375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.08%) |Training time=0.45s (21.24%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7113|ppo_ep: 1|act_loss: 0.0018711090087890625|cri_loss: 0.0011491775512695312|unsuper_loss: 0.0
-average reward score: 5.59375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.12%) |Training time=0.45s (21.21%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7114|ppo_ep: 1|act_loss: -0.00603485107421875|cri_loss: -0.00254058837890625|unsuper_loss: 0.0
-average reward score: 5.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.15%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7115|ppo_ep: 1|act_loss: 0.011688232421875|cri_loss: 0.0062713623046875|unsuper_loss: 0.0
-average reward score: 5.57421875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.46s (21.28%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7116|ppo_ep: 1|act_loss: 0.0015411376953125|cri_loss: 0.0009899139404296875|unsuper_loss: 0.0
-average reward score: 5.75
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.06%) |Training time=0.46s (21.27%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7117|ppo_ep: 1|act_loss: 0.0027523040771484375|cri_loss: 0.00145721435546875|unsuper_loss: 0.0
-average reward score: 5.8203125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.24%) |Training time=0.45s (21.16%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7118|ppo_ep: 1|act_loss: 0.004467010498046875|cri_loss: 0.0024471282958984375|unsuper_loss: 0.0
-average reward score: 5.5390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.15%) |Training time=0.48s (22.22%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.55
-[2023-04-14 13:08:28,469] [INFO] [logging.py:96:log_dist] [Rank 0] step=7120, skipped=90, lr=[5.309650392431538e-07, 5.309650392431538e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:08:28,487] [INFO] [timer.py:199:stop] epoch=0/micro_step=7120/global_step=7120, RunningAvgSamplesPerSec=105.7216516885662, CurrSamplesPerSec=95.38179159059753, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:08:28,580] [INFO] [logging.py:96:log_dist] [Rank 0] step=7120, skipped=122, lr=[2.8932614087824774e-07, 2.8932614087824774e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7119|ppo_ep: 1|act_loss: -0.0005578994750976562|cri_loss: -0.00013780593872070312|unsuper_loss: 0.0
-average reward score: 5.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.66%) |Training time=0.50s (22.78%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7120|ppo_ep: 1|act_loss: 0.0099334716796875|cri_loss: 0.005100250244140625|unsuper_loss: 0.0
-average reward score: 5.40234375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.49%) |Training time=0.50s (22.94%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7121|ppo_ep: 1|act_loss: -0.0159912109375|cri_loss: -0.0079345703125|unsuper_loss: 0.0
-average reward score: 6.109375
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.00%) |Training time=0.52s (23.56%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.42 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7122|ppo_ep: 1|act_loss: -0.0201416015625|cri_loss: -0.00991058349609375|unsuper_loss: 0.0
-average reward score: 5.5
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.17%) |Training time=0.46s (21.19%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7123|ppo_ep: 1|act_loss: -0.020599365234375|cri_loss: -0.00914764404296875|unsuper_loss: 0.0
-average reward score: 5.59375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.88%) |Training time=0.46s (21.50%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7124|ppo_ep: 1|act_loss: -0.00659942626953125|cri_loss: -0.0029087066650390625|unsuper_loss: 0.0
-average reward score: 5.4453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.48s (21.94%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7125|ppo_ep: 1|act_loss: 0.0066680908203125|cri_loss: 0.003437042236328125|unsuper_loss: 0.0
-average reward score: 5.3203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.31%) |Training time=0.48s (22.16%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7126|ppo_ep: 1|act_loss: -0.01512908935546875|cri_loss: -0.0073089599609375|unsuper_loss: 0.0
-average reward score: 5.7265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.84%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7127|ppo_ep: 1|act_loss: -0.00734710693359375|cri_loss: -0.0032787322998046875|unsuper_loss: 0.0
-average reward score: 5.51171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.48s (22.06%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7128|ppo_ep: 1|act_loss: 0.032562255859375|cri_loss: 0.0173492431640625|unsuper_loss: 0.0
-average reward score: 4.9921875
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.47%) |Training time=0.49s (22.00%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.45 |AvgSamplesPerSec=14.55
-[2023-04-14 13:08:50,384] [INFO] [logging.py:96:log_dist] [Rank 0] step=7130, skipped=90, lr=[5.225252514602012e-07, 5.225252514602012e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:08:50,402] [INFO] [timer.py:199:stop] epoch=0/micro_step=7130/global_step=7130, RunningAvgSamplesPerSec=105.71516173869826, CurrSamplesPerSec=97.46446357329275, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:08:50,494] [INFO] [logging.py:96:log_dist] [Rank 0] step=7130, skipped=122, lr=[2.848478941146873e-07, 2.848478941146873e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7129|ppo_ep: 1|act_loss: 0.0390625|cri_loss: 0.0198974609375|unsuper_loss: 0.0
-average reward score: 5.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.23%) |Training time=0.49s (21.49%) |Others=0.10 (4.28%)|CurSamplesPerSec=14.00 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7130|ppo_ep: 1|act_loss: 0.009613037109375|cri_loss: 0.0063323974609375|unsuper_loss: 0.0
-average reward score: 5.91015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.81%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7131|ppo_ep: 1|act_loss: -0.021148681640625|cri_loss: -0.0103607177734375|unsuper_loss: 0.0
-average reward score: 5.30859375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.76%) |Training time=0.47s (21.68%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7132|ppo_ep: 1|act_loss: 0.0086669921875|cri_loss: 0.00457000732421875|unsuper_loss: 0.0
-average reward score: 5.3984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.30%) |Training time=0.48s (22.07%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7133|ppo_ep: 1|act_loss: -0.0162811279296875|cri_loss: -0.00804901123046875|unsuper_loss: 0.0
-average reward score: 5.3984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.47s (21.79%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7134|ppo_ep: 1|act_loss: 0.007770538330078125|cri_loss: 0.00411224365234375|unsuper_loss: 0.0
-average reward score: 5.90234375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.43%) |Training time=0.47s (21.93%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7135|ppo_ep: 1|act_loss: 0.03326416015625|cri_loss: 0.017120361328125|unsuper_loss: 0.0
-average reward score: 5.515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.63%) |Training time=0.47s (21.84%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7136|ppo_ep: 1|act_loss: -0.002288818359375|cri_loss: -0.00075531005859375|unsuper_loss: 0.0
-average reward score: 5.41796875
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.03%) |Training time=0.48s (20.65%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.65 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7137|ppo_ep: 1|act_loss: 0.00754547119140625|cri_loss: 0.003917694091796875|unsuper_loss: 0.0
-average reward score: 5.13671875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.18%) |Training time=0.49s (22.32%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7138|ppo_ep: 1|act_loss: 0.0069580078125|cri_loss: 0.004032135009765625|unsuper_loss: 0.0
-average reward score: 6.12109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.00%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55
-[2023-04-14 13:09:12,227] [INFO] [logging.py:96:log_dist] [Rank 0] step=7140, skipped=90, lr=[5.141492367321766e-07, 5.141492367321766e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:09:12,245] [INFO] [timer.py:199:stop] epoch=0/micro_step=7140/global_step=7140, RunningAvgSamplesPerSec=105.71016137653531, CurrSamplesPerSec=104.10979883555152, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:09:12,338] [INFO] [logging.py:96:log_dist] [Rank 0] step=7140, skipped=122, lr=[2.804024812490991e-07, 2.804024812490991e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7139|ppo_ep: 1|act_loss: 0.005550384521484375|cri_loss: 0.0030155181884765625|unsuper_loss: 0.0
-average reward score: 5.609375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.92%) |Training time=0.47s (21.58%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7140|ppo_ep: 1|act_loss: -0.011016845703125|cri_loss: -0.00543975830078125|unsuper_loss: 0.0
-average reward score: 6.203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.20%) |Training time=0.48s (22.18%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7141|ppo_ep: 1|act_loss: 0.0009465217590332031|cri_loss: 0.0014495849609375|unsuper_loss: 0.0
-average reward score: 5.08203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.32%) |Training time=0.48s (22.15%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7142|ppo_ep: 1|act_loss: -0.0117340087890625|cri_loss: -0.0057830810546875|unsuper_loss: 0.0
-average reward score: 5.84375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.16%) |Training time=0.48s (22.23%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7143|ppo_ep: 1|act_loss: -0.0034694671630859375|cri_loss: -0.0016698837280273438|unsuper_loss: 0.0
-average reward score: 5.84765625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.11%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7144|ppo_ep: 1|act_loss: -0.0047454833984375|cri_loss: -0.0021305084228515625|unsuper_loss: 0.0
-average reward score: 5.3359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.21%) |Training time=0.48s (22.17%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7145|ppo_ep: 1|act_loss: -0.0008382797241210938|cri_loss: 0.00013303756713867188|unsuper_loss: 0.0
-average reward score: 5.53515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.20%) |Training time=0.48s (22.28%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7146|ppo_ep: 1|act_loss: 0.005992889404296875|cri_loss: 0.0034923553466796875|unsuper_loss: 0.0
-average reward score: 4.44921875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.27%) |Training time=0.48s (22.11%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7147|ppo_ep: 1|act_loss: -0.013641357421875|cri_loss: -0.006656646728515625|unsuper_loss: 0.0
-average reward score: 5.35546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (22.09%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7148|ppo_ep: 1|act_loss: 0.071044921875|cri_loss: 0.0413818359375|unsuper_loss: 0.0
-average reward score: 5.31640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.46%) |Training time=0.47s (21.91%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55
-[2023-04-14 13:09:33,886] [INFO] [logging.py:96:log_dist] [Rank 0] step=7150, skipped=90, lr=[5.058371192118248e-07, 5.058371192118248e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:09:33,904] [INFO] [timer.py:199:stop] epoch=0/micro_step=7150/global_step=7150, RunningAvgSamplesPerSec=105.70305846331202, CurrSamplesPerSec=100.97480326808203, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:09:33,996] [INFO] [logging.py:96:log_dist] [Rank 0] step=7150, skipped=122, lr=[2.7598996817322614e-07, 2.7598996817322614e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7149|ppo_ep: 1|act_loss: -0.01239013671875|cri_loss: -0.00600433349609375|unsuper_loss: 0.0
-average reward score: 5.70703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (22.11%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7150|ppo_ep: 1|act_loss: 0.017181396484375|cri_loss: 0.00881195068359375|unsuper_loss: 0.0
-average reward score: 5.53515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.33%) |Training time=0.48s (22.06%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7151|ppo_ep: 1|act_loss: 0.00327301025390625|cri_loss: 0.00215911865234375|unsuper_loss: 0.0
-average reward score: 5.2734375
--------------------------------------------------------------------------------------
-|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.14%) |Training time=0.48s (20.35%) |Others=0.11 (4.51%)|CurSamplesPerSec=13.61 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7152|ppo_ep: 1|act_loss: -0.0084381103515625|cri_loss: -0.00391387939453125|unsuper_loss: 0.0
-average reward score: 5.6015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.04%) |Training time=0.49s (22.35%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7153|ppo_ep: 1|act_loss: 0.01629638671875|cri_loss: 0.009246826171875|unsuper_loss: 0.0
-average reward score: 5.73046875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.62%) |Training time=0.47s (21.83%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7154|ppo_ep: 1|act_loss: 0.0115966796875|cri_loss: 0.006084442138671875|unsuper_loss: 0.0
-average reward score: 5.28125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.47s (21.91%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7155|ppo_ep: 1|act_loss: 0.0010461807250976562|cri_loss: 0.0006875991821289062|unsuper_loss: 0.0
-average reward score: 5.70703125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.47%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7156|ppo_ep: 1|act_loss: 0.0706787109375|cri_loss: 0.04571533203125|unsuper_loss: 0.0
-average reward score: 5.87890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.52%) |Training time=0.47s (21.85%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7157|ppo_ep: 1|act_loss: 0.0011568069458007812|cri_loss: 0.0007429122924804688|unsuper_loss: 0.0
-average reward score: 5.8203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.62%) |Training time=0.47s (21.85%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7158|ppo_ep: 1|act_loss: -0.006542205810546875|cri_loss: -0.0032062530517578125|unsuper_loss: 0.0
-average reward score: 5.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.61%) |Training time=0.48s (21.80%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.55
-[2023-04-14 13:09:55,739] [INFO] [logging.py:96:log_dist] [Rank 0] step=7160, skipped=90, lr=[4.975890221047792e-07, 4.975890221047792e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:09:55,757] [INFO] [timer.py:199:stop] epoch=0/micro_step=7160/global_step=7160, RunningAvgSamplesPerSec=105.69891764973828, CurrSamplesPerSec=103.62230874288463, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:09:55,851] [INFO] [logging.py:96:log_dist] [Rank 0] step=7160, skipped=122, lr=[2.7161042029115706e-07, 2.7161042029115706e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7159|ppo_ep: 1|act_loss: 0.02386474609375|cri_loss: 0.012298583984375|unsuper_loss: 0.0
-average reward score: 5.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.81%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7160|ppo_ep: 1|act_loss: 0.00872802734375|cri_loss: 0.00505828857421875|unsuper_loss: 0.0
-average reward score: 5.12109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.49%) |Training time=0.47s (21.84%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7161|ppo_ep: 1|act_loss: -0.02520751953125|cri_loss: -0.00902557373046875|unsuper_loss: 0.0
-average reward score: 5.51171875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.88%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7162|ppo_ep: 1|act_loss: -0.0174560546875|cri_loss: -0.0083770751953125|unsuper_loss: 0.0
-average reward score: 5.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.16%) |Training time=0.48s (22.15%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7163|ppo_ep: 1|act_loss: -0.0013837814331054688|cri_loss: -0.0005974769592285156|unsuper_loss: 0.0
-average reward score: 5.96875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.74%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7164|ppo_ep: 1|act_loss: -0.01415252685546875|cri_loss: -0.0066070556640625|unsuper_loss: 0.0
-average reward score: 4.609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.51%) |Training time=0.47s (21.84%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7165|ppo_ep: 1|act_loss: -0.0080413818359375|cri_loss: -0.00374603271484375|unsuper_loss: 0.0
-average reward score: 5.30859375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.82%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7166|ppo_ep: 1|act_loss: -0.006404876708984375|cri_loss: -0.003139495849609375|unsuper_loss: 0.0
-average reward score: 4.86328125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.45%) |Training time=0.48s (21.94%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7167|ppo_ep: 1|act_loss: 0.0283203125|cri_loss: 0.0145416259765625|unsuper_loss: 0.0
-average reward score: 5.41015625
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.50%) |Training time=0.47s (20.24%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7168|ppo_ep: 1|act_loss: -0.0019178390502929688|cri_loss: -0.0005779266357421875|unsuper_loss: 0.0
-average reward score: 4.91015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.21%) |Training time=0.48s (22.18%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55
-[2023-04-14 13:10:17,524] [INFO] [logging.py:96:log_dist] [Rank 0] step=7170, skipped=90, lr=[4.894050676677376e-07, 4.894050676677376e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:10:17,543] [INFO] [timer.py:199:stop] epoch=0/micro_step=7170/global_step=7170, RunningAvgSamplesPerSec=105.69460992636105, CurrSamplesPerSec=101.85904479156943, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:10:17,635] [INFO] [logging.py:96:log_dist] [Rank 0] step=7170, skipped=122, lr=[2.672639025183571e-07, 2.672639025183571e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7169|ppo_ep: 1|act_loss: 0.0014104843139648438|cri_loss: 0.0010814666748046875|unsuper_loss: 0.0
-average reward score: 5.2109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.47%) |Training time=0.48s (22.02%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7170|ppo_ep: 1|act_loss: 0.01959228515625|cri_loss: 0.00992584228515625|unsuper_loss: 0.0
-average reward score: 5.48046875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.46%) |Training time=0.47s (21.89%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7171|ppo_ep: 1|act_loss: 0.027862548828125|cri_loss: 0.01568603515625|unsuper_loss: 0.0
-average reward score: 5.57421875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.58%) |Training time=0.47s (21.86%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7172|ppo_ep: 1|act_loss: 0.0726318359375|cri_loss: 0.040069580078125|unsuper_loss: 0.0
-average reward score: 5.3984375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.47%) |Training time=0.48s (21.94%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7173|ppo_ep: 1|act_loss: 0.045166015625|cri_loss: 0.02301025390625|unsuper_loss: 0.0
-average reward score: 5.625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.48s (22.01%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7174|ppo_ep: 1|act_loss: 0.004367828369140625|cri_loss: 0.0023746490478515625|unsuper_loss: 0.0
-average reward score: 5.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.43%) |Training time=0.47s (21.96%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7175|ppo_ep: 1|act_loss: 0.0097808837890625|cri_loss: 0.005435943603515625|unsuper_loss: 0.0
-average reward score: 6.01953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.65%) |Training time=0.47s (21.79%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7176|ppo_ep: 1|act_loss: 0.0154571533203125|cri_loss: 0.00794219970703125|unsuper_loss: 0.0
-average reward score: 5.5859375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.31%) |Training time=0.48s (22.07%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7177|ppo_ep: 1|act_loss: 0.004146575927734375|cri_loss: 0.00255584716796875|unsuper_loss: 0.0
-average reward score: 5.453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.88%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7178|ppo_ep: 1|act_loss: -0.03350830078125|cri_loss: -0.0157623291015625|unsuper_loss: 0.0
-average reward score: 4.9609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.52%) |Training time=0.47s (21.85%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.55
-[2023-04-14 13:10:39,135] [INFO] [logging.py:96:log_dist] [Rank 0] step=7180, skipped=90, lr=[4.812853772066454e-07, 4.812853772066454e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:10:39,153] [INFO] [timer.py:199:stop] epoch=0/micro_step=7180/global_step=7180, RunningAvgSamplesPerSec=105.69055579370145, CurrSamplesPerSec=102.88281263773706, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:10:39,246] [INFO] [logging.py:96:log_dist] [Rank 0] step=7180, skipped=122, lr=[2.629504792807036e-07, 2.629504792807036e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7179|ppo_ep: 1|act_loss: -0.004405975341796875|cri_loss: -0.0016450881958007812|unsuper_loss: 0.0
-average reward score: 4.6796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.63%) |Training time=0.47s (21.86%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7180|ppo_ep: 1|act_loss: -0.0251007080078125|cri_loss: -0.012298583984375|unsuper_loss: 0.0
-average reward score: 6.08203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.82%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7181|ppo_ep: 1|act_loss: 0.0293731689453125|cri_loss: 0.01560211181640625|unsuper_loss: 0.0
-average reward score: 5.6875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.47s (21.76%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7182|ppo_ep: 1|act_loss: -0.002353668212890625|cri_loss: 0.0001239776611328125|unsuper_loss: 0.0
-average reward score: 5.7265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.47s (21.85%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7183|ppo_ep: 1|act_loss: 0.01068115234375|cri_loss: 0.00550079345703125|unsuper_loss: 0.0
-average reward score: 5.546875
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.54%) |Training time=0.47s (20.15%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.75 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7184|ppo_ep: 1|act_loss: -0.0238189697265625|cri_loss: -0.01168060302734375|unsuper_loss: 0.0
-average reward score: 5.46484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.46%) |Training time=0.47s (21.92%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7185|ppo_ep: 1|act_loss: 0.01348114013671875|cri_loss: 0.0068206787109375|unsuper_loss: 0.0
-average reward score: 5.89453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.69%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7186|ppo_ep: 1|act_loss: 0.0394287109375|cri_loss: 0.0218963623046875|unsuper_loss: 0.0
-average reward score: 5.8359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.47s (21.92%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7187|ppo_ep: 1|act_loss: 0.002780914306640625|cri_loss: 0.0017032623291015625|unsuper_loss: 0.0
-average reward score: 6.01171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.73%) |Training time=0.47s (21.68%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7188|ppo_ep: 1|act_loss: -0.01236724853515625|cri_loss: -0.006107330322265625|unsuper_loss: 0.0
-average reward score: 4.80859375
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.76%) |Training time=0.47s (20.81%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.17 |AvgSamplesPerSec=14.55
-[2023-04-14 13:11:01,012] [INFO] [logging.py:96:log_dist] [Rank 0] step=7190, skipped=90, lr=[4.732300710749039e-07, 4.732300710749039e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:11:01,030] [INFO] [timer.py:199:stop] epoch=0/micro_step=7190/global_step=7190, RunningAvgSamplesPerSec=105.68814051767909, CurrSamplesPerSec=104.36301713908928, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:11:01,123] [INFO] [logging.py:96:log_dist] [Rank 0] step=7190, skipped=122, lr=[2.586702145135353e-07, 2.586702145135353e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7189|ppo_ep: 1|act_loss: -0.003192901611328125|cri_loss: -0.0009927749633789062|unsuper_loss: 0.0
-average reward score: 5.484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.78%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7190|ppo_ep: 1|act_loss: -0.00814056396484375|cri_loss: -0.003192901611328125|unsuper_loss: 0.0
-average reward score: 5.64453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (22.04%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7191|ppo_ep: 1|act_loss: -0.01434326171875|cri_loss: -0.0070953369140625|unsuper_loss: 0.0
-average reward score: 5.34375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.78%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7192|ppo_ep: 1|act_loss: -0.025177001953125|cri_loss: -0.01215362548828125|unsuper_loss: 0.0
-average reward score: 6.16015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.39%) |Training time=0.47s (21.98%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7193|ppo_ep: 1|act_loss: -0.01399993896484375|cri_loss: -0.00669097900390625|unsuper_loss: 0.0
-average reward score: 5.91796875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.48s (22.02%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7194|ppo_ep: 1|act_loss: -0.01259613037109375|cri_loss: -0.0061798095703125|unsuper_loss: 0.0
-average reward score: 5.296875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.71%) |Training time=0.47s (21.68%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7195|ppo_ep: 1|act_loss: -0.02081298828125|cri_loss: -0.01021575927734375|unsuper_loss: 0.0
-average reward score: 5.09765625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.47s (21.95%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7196|ppo_ep: 1|act_loss: 0.01335906982421875|cri_loss: 0.00733184814453125|unsuper_loss: 0.0
-average reward score: 5.265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.21%) |Training time=0.48s (22.16%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7197|ppo_ep: 1|act_loss: 0.0306243896484375|cri_loss: 0.01568603515625|unsuper_loss: 0.0
-average reward score: 5.125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.71%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7198|ppo_ep: 1|act_loss: 0.00463104248046875|cri_loss: 0.002452850341796875|unsuper_loss: 0.0
-average reward score: 5.73046875
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.18%) |Training time=0.48s (20.49%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.55
-[2023-04-14 13:11:22,828] [INFO] [logging.py:96:log_dist] [Rank 0] step=7200, skipped=90, lr=[4.6523926867158245e-07, 4.6523926867158245e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:11:22,846] [INFO] [timer.py:199:stop] epoch=0/micro_step=7200/global_step=7200, RunningAvgSamplesPerSec=105.68242647988721, CurrSamplesPerSec=93.17142805575108, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:11:22,939] [INFO] [logging.py:96:log_dist] [Rank 0] step=7200, skipped=122, lr=[2.544231716607015e-07, 2.544231716607015e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7199|ppo_ep: 1|act_loss: 0.0150604248046875|cri_loss: 0.008148193359375|unsuper_loss: 0.0
-average reward score: 5.79296875
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.32%) |Training time=0.51s (23.00%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.55
-[2023-04-14 13:11:24,992] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 7200|ppo_ep: 1|act_loss: 0.0025997161865234375|cri_loss: 0.0015277862548828125|unsuper_loss: 0.0
-average reward score: 5.96484375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.66s (77.08%) |Training time=0.39s (18.09%) |Others=0.10 (4.83%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7201|ppo_ep: 1|act_loss: 0.027587890625|cri_loss: 0.01409149169921875|unsuper_loss: 0.0
-average reward score: 5.02734375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.44%) |Training time=0.42s (18.98%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7202|ppo_ep: 1|act_loss: 0.0214385986328125|cri_loss: 0.01111602783203125|unsuper_loss: 0.0
-average reward score: 5.1171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.24%) |Training time=0.46s (21.11%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7203|ppo_ep: 1|act_loss: -0.0129547119140625|cri_loss: -0.00603485107421875|unsuper_loss: 0.0
-average reward score: 5.18359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.12%) |Training time=0.46s (21.23%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7204|ppo_ep: 1|act_loss: 0.003009796142578125|cri_loss: 0.0017175674438476562|unsuper_loss: 0.0
-average reward score: 6.03125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.92%) |Training time=0.47s (21.44%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7205|ppo_ep: 1|act_loss: 0.001621246337890625|cri_loss: 0.001251220703125|unsuper_loss: 0.0
-average reward score: 5.05078125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.14%) |Training time=0.46s (21.05%) |Others=0.10 (4.81%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7206|ppo_ep: 1|act_loss: 0.001617431640625|cri_loss: 0.0009260177612304688|unsuper_loss: 0.0
-average reward score: 5.40234375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.81%) |Training time=0.45s (20.44%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7207|ppo_ep: 1|act_loss: -3.62396240234375e-05|cri_loss: 0.0010194778442382812|unsuper_loss: 0.0
-average reward score: 5.8046875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.40%) |Training time=0.46s (20.93%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7208|ppo_ep: 1|act_loss: 0.013427734375|cri_loss: 0.007190704345703125|unsuper_loss: 0.0
-average reward score: 5.234375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.77%) |Training time=0.50s (22.64%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.55
-[2023-04-14 13:11:44,585] [INFO] [logging.py:96:log_dist] [Rank 0] step=7210, skipped=91, lr=[4.581027951092893e-07, 4.581027951092893e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:11:44,603] [INFO] [timer.py:199:stop] epoch=0/micro_step=7210/global_step=7210, RunningAvgSamplesPerSec=105.68967135042475, CurrSamplesPerSec=108.45140855085393, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:11:44,687] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-[2023-04-14 13:11:44,687] [INFO] [logging.py:96:log_dist] [Rank 0] step=7210, skipped=123, lr=[2.506292898694468e-07, 2.506292898694468e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7209|ppo_ep: 1|act_loss: 0.027099609375|cri_loss: 0.014129638671875|unsuper_loss: 0.0
-average reward score: 6.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.59%) |Training time=0.46s (21.25%) |Others=0.09 (4.16%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55
-[2023-04-14 13:11:46,852] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 7210|ppo_ep: 1|act_loss: 0.01174163818359375|cri_loss: 0.005977630615234375|unsuper_loss: 0.0
-average reward score: 5.55078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.51%) |Training time=0.48s (22.33%) |Others=0.09 (4.16%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7211|ppo_ep: 1|act_loss: -0.00045490264892578125|cri_loss: -0.00011813640594482422|unsuper_loss: 0.0
-average reward score: 5.1953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.12%) |Training time=0.48s (22.27%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7212|ppo_ep: 1|act_loss: 0.012725830078125|cri_loss: 0.00652313232421875|unsuper_loss: 0.0
-average reward score: 5.8203125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.02%) |Training time=0.49s (22.25%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7213|ppo_ep: 1|act_loss: -0.0278472900390625|cri_loss: -0.01361083984375|unsuper_loss: 0.0
-average reward score: 5.76953125
--------------------------------------------------------------------------------------
-|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.15%) |Training time=0.49s (20.57%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.51 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7214|ppo_ep: 1|act_loss: 0.004974365234375|cri_loss: 0.0028705596923828125|unsuper_loss: 0.0
-average reward score: 6.58984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.47%) |Training time=0.48s (21.91%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55
-[2023-04-14 13:11:57,807] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
-epoch: 0|step: 7215|ppo_ep: 1|act_loss: -0.00435638427734375|cri_loss: -0.0017786026000976562|unsuper_loss: 0.0
-average reward score: 4.76171875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.43%) |Training time=0.45s (20.89%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7216|ppo_ep: 1|act_loss: -0.0201416015625|cri_loss: -0.00984954833984375|unsuper_loss: 0.0
-average reward score: 4.91796875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.40%) |Training time=0.48s (21.79%) |Others=0.11 (4.82%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7217|ppo_ep: 1|act_loss: 0.019775390625|cri_loss: 0.0100860595703125|unsuper_loss: 0.0
-average reward score: 4.90234375
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.20%) |Training time=0.49s (22.23%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7218|ppo_ep: 1|act_loss: 0.0010919570922851562|cri_loss: 0.0005993843078613281|unsuper_loss: 0.0
-average reward score: 5.8125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.75%) |Training time=0.49s (22.66%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.55
-[2023-04-14 13:12:06,535] [INFO] [logging.py:96:log_dist] [Rank 0] step=7220, skipped=92, lr=[4.5101875119972634e-07, 4.5101875119972634e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:12:06,553] [INFO] [timer.py:199:stop] epoch=0/micro_step=7220/global_step=7220, RunningAvgSamplesPerSec=105.682454829117, CurrSamplesPerSec=98.92933678188963, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:12:06,646] [INFO] [logging.py:96:log_dist] [Rank 0] step=7220, skipped=124, lr=[2.4686241437572036e-07, 2.4686241437572036e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7219|ppo_ep: 1|act_loss: -0.0048065185546875|cri_loss: -0.002262115478515625|unsuper_loss: 0.0
-average reward score: 5.5390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.97%) |Training time=0.49s (22.40%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7220|ppo_ep: 1|act_loss: 0.0068359375|cri_loss: 0.00359344482421875|unsuper_loss: 0.0
-average reward score: 5.58984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.69%) |Training time=0.49s (22.68%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7221|ppo_ep: 1|act_loss: -0.020599365234375|cri_loss: -0.0101776123046875|unsuper_loss: 0.0
-average reward score: 5.59765625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.79%) |Training time=0.49s (22.58%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7222|ppo_ep: 1|act_loss: 0.01129150390625|cri_loss: 0.00586700439453125|unsuper_loss: 0.0
-average reward score: 4.8984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.69%) |Training time=0.49s (22.70%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7223|ppo_ep: 1|act_loss: -0.0013971328735351562|cri_loss: -0.0006508827209472656|unsuper_loss: 0.0
-average reward score: 5.69921875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.84%) |Training time=0.49s (22.56%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7224|ppo_ep: 1|act_loss: -0.02392578125|cri_loss: -0.01168060302734375|unsuper_loss: 0.0
-average reward score: 5.28125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.81%) |Training time=0.49s (22.58%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7225|ppo_ep: 1|act_loss: 0.01824951171875|cri_loss: 0.0111083984375|unsuper_loss: 0.0
-average reward score: 5.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.67%) |Training time=0.49s (22.73%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7226|ppo_ep: 1|act_loss: -0.014801025390625|cri_loss: -0.007198333740234375|unsuper_loss: 0.0
-average reward score: 5.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.80%) |Training time=0.49s (22.57%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7227|ppo_ep: 1|act_loss: 0.03509521484375|cri_loss: 0.018218994140625|unsuper_loss: 0.0
-average reward score: 6.125
--------------------------------------------------------------------------------------
-|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.06%) |Training time=0.49s (22.03%) |Others=0.15 (6.91%)|CurSamplesPerSec=14.36 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7228|ppo_ep: 1|act_loss: 0.00925445556640625|cri_loss: 0.004772186279296875|unsuper_loss: 0.0
-average reward score: 5.328125
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.62s (72.92%) |Training time=0.50s (22.49%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.44 |AvgSamplesPerSec=14.55
-[2023-04-14 13:12:28,345] [INFO] [logging.py:96:log_dist] [Rank 0] step=7230, skipped=92, lr=[4.4320918629398245e-07, 4.4320918629398245e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:12:28,364] [INFO] [timer.py:199:stop] epoch=0/micro_step=7230/global_step=7230, RunningAvgSamplesPerSec=105.67111864118128, CurrSamplesPerSec=109.30850571717106, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:12:28,456] [INFO] [logging.py:96:log_dist] [Rank 0] step=7230, skipped=124, lr=[2.427087262091782e-07, 2.427087262091782e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7229|ppo_ep: 1|act_loss: 0.0153045654296875|cri_loss: 0.008270263671875|unsuper_loss: 0.0
-average reward score: 5.546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.46s (21.19%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7230|ppo_ep: 1|act_loss: -0.0004405975341796875|cri_loss: -7.367134094238281e-05|unsuper_loss: 0.0
-average reward score: 4.96484375
--------------------------------------------------------------------------------------
-|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.26%) |Training time=0.54s (24.25%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.34 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7231|ppo_ep: 1|act_loss: -0.0009431838989257812|cri_loss: -0.00023365020751953125|unsuper_loss: 0.0
-average reward score: 5.45703125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.12%) |Training time=0.49s (22.26%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7232|ppo_ep: 1|act_loss: 0.016845703125|cri_loss: 0.0087432861328125|unsuper_loss: 0.0
-average reward score: 4.96484375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.99%) |Training time=0.49s (22.35%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7233|ppo_ep: 1|act_loss: 0.03729248046875|cri_loss: 0.019073486328125|unsuper_loss: 0.0
-average reward score: 5.359375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.83%) |Training time=0.49s (22.58%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7234|ppo_ep: 1|act_loss: 0.01044464111328125|cri_loss: 0.005260467529296875|unsuper_loss: 0.0
-average reward score: 5.5234375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.05%) |Training time=0.49s (22.35%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7235|ppo_ep: 1|act_loss: -0.01177978515625|cri_loss: -0.00554656982421875|unsuper_loss: 0.0
-average reward score: 5.5703125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.01%) |Training time=0.49s (22.40%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7236|ppo_ep: 1|act_loss: 0.01081085205078125|cri_loss: 0.005710601806640625|unsuper_loss: 0.0
-average reward score: 5.66796875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.12%) |Training time=0.49s (22.27%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7237|ppo_ep: 1|act_loss: 0.0028858184814453125|cri_loss: 0.0016603469848632812|unsuper_loss: 0.0
-average reward score: 5.41796875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.94%) |Training time=0.49s (22.48%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7238|ppo_ep: 1|act_loss: -0.0245361328125|cri_loss: -0.011871337890625|unsuper_loss: 0.0
-average reward score: 4.9375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.90%) |Training time=0.49s (22.52%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.55
-[2023-04-14 13:12:50,207] [INFO] [logging.py:96:log_dist] [Rank 0] step=7240, skipped=92, lr=[4.354645700985926e-07, 4.354645700985926e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:12:50,225] [INFO] [timer.py:199:stop] epoch=0/micro_step=7240/global_step=7240, RunningAvgSamplesPerSec=105.65752762833793, CurrSamplesPerSec=98.45495143921833, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:12:50,318] [INFO] [logging.py:96:log_dist] [Rank 0] step=7240, skipped=124, lr=[2.385884965447316e-07, 2.385884965447316e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7239|ppo_ep: 1|act_loss: -0.0081329345703125|cri_loss: -0.003597259521484375|unsuper_loss: 0.0
-average reward score: 5.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.95%) |Training time=0.49s (22.46%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7240|ppo_ep: 1|act_loss: -0.013763427734375|cri_loss: -0.0066986083984375|unsuper_loss: 0.0
-average reward score: 5.3125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.73%) |Training time=0.50s (22.68%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7241|ppo_ep: 1|act_loss: -0.0121917724609375|cri_loss: -0.00574493408203125|unsuper_loss: 0.0
-average reward score: 5.171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.02%) |Training time=0.49s (22.38%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7242|ppo_ep: 1|act_loss: -0.025238037109375|cri_loss: -0.0124053955078125|unsuper_loss: 0.0
-average reward score: 5.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.59%) |Training time=0.50s (22.81%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7243|ppo_ep: 1|act_loss: 0.00975799560546875|cri_loss: 0.00499725341796875|unsuper_loss: 0.0
-average reward score: 5.484375
--------------------------------------------------------------------------------------
-|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.77s (74.78%) |Training time=0.49s (20.94%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.54 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7244|ppo_ep: 1|act_loss: -0.0159912109375|cri_loss: -0.007350921630859375|unsuper_loss: 0.0
-average reward score: 5.609375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.83%) |Training time=0.49s (22.56%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7245|ppo_ep: 1|act_loss: -0.001018524169921875|cri_loss: -0.00044727325439453125|unsuper_loss: 0.0
-average reward score: 5.296875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.48s (22.30%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7246|ppo_ep: 1|act_loss: 0.001873016357421875|cri_loss: 0.0011262893676757812|unsuper_loss: 0.0
-average reward score: 5.53125
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.24%) |Training time=0.49s (22.23%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7247|ppo_ep: 1|act_loss: -0.004680633544921875|cri_loss: -0.0019683837890625|unsuper_loss: 0.0
-average reward score: 5.6328125
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.17%) |Training time=0.49s (21.44%) |Others=0.10 (4.39%)|CurSamplesPerSec=14.06 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7248|ppo_ep: 1|act_loss: 0.002910614013671875|cri_loss: 0.0017080307006835938|unsuper_loss: 0.0
-average reward score: 5.36328125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.75%) |Training time=0.49s (22.66%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.55
-[2023-04-14 13:13:12,316] [INFO] [logging.py:96:log_dist] [Rank 0] step=7250, skipped=92, lr=[4.2778501740745257e-07, 4.2778501740745257e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:13:12,335] [INFO] [timer.py:199:stop] epoch=0/micro_step=7250/global_step=7250, RunningAvgSamplesPerSec=105.6449480402019, CurrSamplesPerSec=97.82626105957648, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:13:12,427] [INFO] [logging.py:96:log_dist] [Rank 0] step=7250, skipped=124, lr=[2.3450178645412518e-07, 2.3450178645412518e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7249|ppo_ep: 1|act_loss: 0.002838134765625|cri_loss: 0.00189971923828125|unsuper_loss: 0.0
-average reward score: 5.125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.08%) |Training time=0.49s (22.39%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7250|ppo_ep: 1|act_loss: -0.0059967041015625|cri_loss: -0.0029048919677734375|unsuper_loss: 0.0
-average reward score: 5.171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.90%) |Training time=0.49s (22.46%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7251|ppo_ep: 1|act_loss: -0.0037975311279296875|cri_loss: -0.001712799072265625|unsuper_loss: 0.0
-average reward score: 5.0078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.96%) |Training time=0.49s (22.45%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7252|ppo_ep: 1|act_loss: -0.0023326873779296875|cri_loss: -0.0010633468627929688|unsuper_loss: 0.0
-average reward score: 5.73046875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.61%) |Training time=0.50s (22.81%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7253|ppo_ep: 1|act_loss: 0.0113525390625|cri_loss: 0.00598907470703125|unsuper_loss: 0.0
-average reward score: 5.07421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.90%) |Training time=0.49s (22.49%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7254|ppo_ep: 1|act_loss: -0.0007228851318359375|cri_loss: 3.24249267578125e-05|unsuper_loss: 0.0
-average reward score: 5.5625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.81%) |Training time=0.49s (22.59%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7255|ppo_ep: 1|act_loss: -0.0020904541015625|cri_loss: -0.0009593963623046875|unsuper_loss: 0.0
-average reward score: 5.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.64%) |Training time=0.50s (22.79%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.55
-[2023-04-14 13:13:27,520] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192
-epoch: 0|step: 7256|ppo_ep: 1|act_loss: 0.0012178421020507812|cri_loss: 0.0009031295776367188|unsuper_loss: 0.0
-average reward score: 5.86328125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.91%) |Training time=0.46s (21.43%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7257|ppo_ep: 1|act_loss: -0.0136871337890625|cri_loss: -0.006656646728515625|unsuper_loss: 0.0
-average reward score: 5.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.86%) |Training time=0.49s (22.54%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7258|ppo_ep: 1|act_loss: 0.031036376953125|cri_loss: 0.01690673828125|unsuper_loss: 0.0
-average reward score: 5.671875
--------------------------------------------------------------------------------------
-|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.19%) |Training time=0.48s (20.51%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.63 |AvgSamplesPerSec=14.55
-[2023-04-14 13:13:34,199] [INFO] [logging.py:96:log_dist] [Rank 0] step=7260, skipped=93, lr=[4.209291433815917e-07, 4.209291433815917e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:13:34,218] [INFO] [timer.py:199:stop] epoch=0/micro_step=7260/global_step=7260, RunningAvgSamplesPerSec=105.63492765670183, CurrSamplesPerSec=99.31827821724299, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:13:34,310] [INFO] [logging.py:96:log_dist] [Rank 0] step=7260, skipped=124, lr=[2.3044865651226017e-07, 2.3044865651226017e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7259|ppo_ep: 1|act_loss: -0.0121307373046875|cri_loss: -0.00601959228515625|unsuper_loss: 0.0
-average reward score: 5.58203125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.16%) |Training time=0.48s (22.25%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7260|ppo_ep: 1|act_loss: 0.001026153564453125|cri_loss: 0.0007610321044921875|unsuper_loss: 0.0
-average reward score: 5.36328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.69%) |Training time=0.47s (21.71%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7261|ppo_ep: 1|act_loss: -0.027099609375|cri_loss: -0.013214111328125|unsuper_loss: 0.0
-average reward score: 5.8046875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.76%) |Training time=0.47s (21.61%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7262|ppo_ep: 1|act_loss: -0.0026416778564453125|cri_loss: -0.00098419189453125|unsuper_loss: 0.0
-average reward score: 5.08203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.63%) |Training time=0.47s (21.76%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7263|ppo_ep: 1|act_loss: -0.000705718994140625|cri_loss: -0.0001647472381591797|unsuper_loss: 0.0
-average reward score: 6.0703125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.30%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7264|ppo_ep: 1|act_loss: 0.00021183490753173828|cri_loss: 0.0003452301025390625|unsuper_loss: 0.0
-average reward score: 5.1875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.48s (21.97%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7265|ppo_ep: 1|act_loss: 0.020904541015625|cri_loss: 0.01153564453125|unsuper_loss: 0.0
-average reward score: 5.17578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.70%) |Training time=0.47s (21.65%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7266|ppo_ep: 1|act_loss: 0.02044677734375|cri_loss: 0.01055145263671875|unsuper_loss: 0.0
-average reward score: 4.921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.75%) |Training time=0.47s (21.63%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7267|ppo_ep: 1|act_loss: -0.02435302734375|cri_loss: -0.011871337890625|unsuper_loss: 0.0
-average reward score: 5.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.86%) |Training time=0.47s (21.46%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7268|ppo_ep: 1|act_loss: -0.0102081298828125|cri_loss: -0.004817962646484375|unsuper_loss: 0.0
-average reward score: 5.4296875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.74%) |Training time=0.47s (21.65%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.55
-[2023-04-14 13:13:55,869] [INFO] [logging.py:96:log_dist] [Rank 0] step=7270, skipped=93, lr=[4.133735241503523e-07, 4.133735241503523e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:13:55,887] [INFO] [timer.py:199:stop] epoch=0/micro_step=7270/global_step=7270, RunningAvgSamplesPerSec=105.63331027433297, CurrSamplesPerSec=105.13135218502553, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:13:55,980] [INFO] [logging.py:96:log_dist] [Rank 0] step=7270, skipped=124, lr=[2.2642916679630006e-07, 2.2642916679630006e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7269|ppo_ep: 1|act_loss: 0.02801513671875|cri_loss: 0.01450347900390625|unsuper_loss: 0.0
-average reward score: 5.1640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.57%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55
-[2023-04-14 13:13:58,022] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096
-epoch: 0|step: 7270|ppo_ep: 1|act_loss: 0.0019197463989257812|cri_loss: 0.0013561248779296875|unsuper_loss: 0.0
-average reward score: 5.03125
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.71%) |Training time=0.44s (20.64%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7271|ppo_ep: 1|act_loss: 0.00724029541015625|cri_loss: 0.0037212371826171875|unsuper_loss: 0.0
-average reward score: 5.23828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.80%) |Training time=0.47s (21.58%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7272|ppo_ep: 1|act_loss: -0.00864410400390625|cri_loss: -0.004055023193359375|unsuper_loss: 0.0
-average reward score: 5.3125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.91%) |Training time=0.46s (21.45%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7273|ppo_ep: 1|act_loss: -0.0122833251953125|cri_loss: -0.00557708740234375|unsuper_loss: 0.0
-average reward score: 5.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.70%) |Training time=0.47s (19.98%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7274|ppo_ep: 1|act_loss: -0.02825927734375|cri_loss: -0.013885498046875|unsuper_loss: 0.0
-average reward score: 5.828125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.78%) |Training time=0.47s (21.63%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7275|ppo_ep: 1|act_loss: -0.00447845458984375|cri_loss: -0.0021266937255859375|unsuper_loss: 0.0
-average reward score: 5.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.87%) |Training time=0.47s (21.52%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7276|ppo_ep: 1|act_loss: 0.0157470703125|cri_loss: 0.00909423828125|unsuper_loss: 0.0
-average reward score: 5.17578125
--------------------------------------------------------------------------------------
-|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.23%) |Training time=0.47s (20.37%) |Others=0.10 (4.40%)|CurSamplesPerSec=14.00 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7277|ppo_ep: 1|act_loss: 0.117919921875|cri_loss: 0.06317138671875|unsuper_loss: 0.0
-average reward score: 5.16015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.51%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7278|ppo_ep: 1|act_loss: 0.01934814453125|cri_loss: 0.00983428955078125|unsuper_loss: 0.0
-average reward score: 5.3984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.69%) |Training time=0.47s (21.71%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55
-[2023-04-14 13:14:17,796] [INFO] [logging.py:96:log_dist] [Rank 0] step=7280, skipped=94, lr=[4.066293729300195e-07, 4.066293729300195e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:14:17,815] [INFO] [timer.py:199:stop] epoch=0/micro_step=7280/global_step=7280, RunningAvgSamplesPerSec=105.63432861140478, CurrSamplesPerSec=106.89784560000638, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:14:17,907] [INFO] [logging.py:96:log_dist] [Rank 0] step=7280, skipped=124, lr=[2.224433768847789e-07, 2.224433768847789e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7279|ppo_ep: 1|act_loss: 0.02032470703125|cri_loss: 0.01061248779296875|unsuper_loss: 0.0
-average reward score: 5.44140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.05%) |Training time=0.46s (21.35%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7280|ppo_ep: 1|act_loss: 0.0042724609375|cri_loss: 0.00223541259765625|unsuper_loss: 0.0
-average reward score: 4.66015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.79%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7281|ppo_ep: 1|act_loss: -0.0147247314453125|cri_loss: -0.00708770751953125|unsuper_loss: 0.0
-average reward score: 5.74609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.85%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7282|ppo_ep: 1|act_loss: -0.009124755859375|cri_loss: -0.00443267822265625|unsuper_loss: 0.0
-average reward score: 4.9921875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.76%) |Training time=0.47s (21.61%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7283|ppo_ep: 1|act_loss: 0.004364013671875|cri_loss: 0.0024471282958984375|unsuper_loss: 0.0
-average reward score: 4.83984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.47s (21.73%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7284|ppo_ep: 1|act_loss: 0.001445770263671875|cri_loss: 0.00115203857421875|unsuper_loss: 0.0
-average reward score: 4.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.48s (21.96%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7285|ppo_ep: 1|act_loss: 0.00766754150390625|cri_loss: 0.004055023193359375|unsuper_loss: 0.0
-average reward score: 5.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.93%) |Training time=0.46s (21.45%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7286|ppo_ep: 1|act_loss: -0.00945281982421875|cri_loss: -0.004619598388671875|unsuper_loss: 0.0
-average reward score: 4.65234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.86%) |Training time=0.47s (21.51%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7287|ppo_ep: 1|act_loss: -0.0136871337890625|cri_loss: -0.006744384765625|unsuper_loss: 0.0
-average reward score: 4.6875
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.60s (70.62%) |Training time=0.48s (21.03%) |Others=0.19 (8.35%)|CurSamplesPerSec=14.16 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7288|ppo_ep: 1|act_loss: -0.0018205642700195312|cri_loss: -0.0007228851318359375|unsuper_loss: 0.0
-average reward score: 5.4921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.86%) |Training time=0.46s (21.35%) |Others=0.10 (4.78%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.55
-[2023-04-14 13:14:39,554] [INFO] [logging.py:96:log_dist] [Rank 0] step=7290, skipped=94, lr=[3.9919809145031695e-07, 3.9919809145031695e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:14:39,573] [INFO] [timer.py:199:stop] epoch=0/micro_step=7290/global_step=7290, RunningAvgSamplesPerSec=105.63344822690242, CurrSamplesPerSec=110.83196229238267, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:14:39,665] [INFO] [logging.py:96:log_dist] [Rank 0] step=7290, skipped=124, lr=[2.1849134585671922e-07, 2.1849134585671922e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7289|ppo_ep: 1|act_loss: -0.03265380859375|cri_loss: -0.01580810546875|unsuper_loss: 0.0
-average reward score: 5.48046875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.50%) |Training time=0.45s (20.88%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7290|ppo_ep: 1|act_loss: -0.0302276611328125|cri_loss: -0.014495849609375|unsuper_loss: 0.0
-average reward score: 4.96484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.07%) |Training time=0.46s (21.32%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7291|ppo_ep: 1|act_loss: 0.008209228515625|cri_loss: 0.004390716552734375|unsuper_loss: 0.0
-average reward score: 5.71484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.82%) |Training time=0.47s (21.54%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7292|ppo_ep: 1|act_loss: 0.00888824462890625|cri_loss: 0.00460052490234375|unsuper_loss: 0.0
-average reward score: 4.99609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.08%) |Training time=0.46s (21.28%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7293|ppo_ep: 1|act_loss: 0.02685546875|cri_loss: 0.01367950439453125|unsuper_loss: 0.0
-average reward score: 5.1015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.86%) |Training time=0.47s (21.52%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7294|ppo_ep: 1|act_loss: 0.0189208984375|cri_loss: 0.00954437255859375|unsuper_loss: 0.0
-average reward score: 5.55078125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.86%) |Training time=0.47s (21.52%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7295|ppo_ep: 1|act_loss: 0.01593017578125|cri_loss: 0.00836181640625|unsuper_loss: 0.0
-average reward score: 5.109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.22%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7296|ppo_ep: 1|act_loss: 0.02227783203125|cri_loss: 0.0115814208984375|unsuper_loss: 0.0
-average reward score: 5.1171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.69%) |Training time=0.47s (21.71%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7297|ppo_ep: 1|act_loss: -0.00872802734375|cri_loss: -0.003902435302734375|unsuper_loss: 0.0
-average reward score: 5.80078125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.39%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7298|ppo_ep: 1|act_loss: 0.0307159423828125|cri_loss: 0.015655517578125|unsuper_loss: 0.0
-average reward score: 4.703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.97%) |Training time=0.49s (22.49%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
-[2023-04-14 13:15:01,191] [INFO] [logging.py:96:log_dist] [Rank 0] step=7300, skipped=94, lr=[3.918324110315662e-07, 3.918324110315662e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:15:01,209] [INFO] [timer.py:199:stop] epoch=0/micro_step=7300/global_step=7300, RunningAvgSamplesPerSec=105.63232889516885, CurrSamplesPerSec=100.18558607917797, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:15:01,302] [INFO] [logging.py:96:log_dist] [Rank 0] step=7300, skipped=124, lr=[2.1457313229075322e-07, 2.1457313229075322e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7299|ppo_ep: 1|act_loss: -0.0007047653198242188|cri_loss: 0.00031280517578125|unsuper_loss: 0.0
-average reward score: 6.32421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.31%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7300|ppo_ep: 1|act_loss: 0.00514984130859375|cri_loss: 0.00290679931640625|unsuper_loss: 0.0
-average reward score: 4.609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.91%) |Training time=0.49s (22.47%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7301|ppo_ep: 1|act_loss: 0.013763427734375|cri_loss: 0.007228851318359375|unsuper_loss: 0.0
-average reward score: 5.28515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.49s (22.46%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7302|ppo_ep: 1|act_loss: 0.002460479736328125|cri_loss: 0.0013637542724609375|unsuper_loss: 0.0
-average reward score: 5.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.61s (70.38%) |Training time=0.57s (24.93%) |Others=0.11 (4.69%)|CurSamplesPerSec=14.01 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7303|ppo_ep: 1|act_loss: -0.0006494522094726562|cri_loss: -0.0001862049102783203|unsuper_loss: 0.0
-average reward score: 5.1171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.90%) |Training time=0.49s (22.58%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7304|ppo_ep: 1|act_loss: -0.025909423828125|cri_loss: -0.01270294189453125|unsuper_loss: 0.0
-average reward score: 5.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.96%) |Training time=0.47s (21.45%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7305|ppo_ep: 1|act_loss: -0.0158233642578125|cri_loss: -0.00772857666015625|unsuper_loss: 0.0
-average reward score: 6.1640625
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.72%) |Training time=0.48s (21.80%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7306|ppo_ep: 1|act_loss: 0.0014791488647460938|cri_loss: 0.0011234283447265625|unsuper_loss: 0.0
-average reward score: 5.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.68s (73.96%) |Training time=0.49s (21.66%) |Others=0.10 (4.39%)|CurSamplesPerSec=14.10 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7307|ppo_ep: 1|act_loss: 0.01947021484375|cri_loss: 0.01013946533203125|unsuper_loss: 0.0
-average reward score: 5.53515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.05%) |Training time=0.49s (22.42%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7308|ppo_ep: 1|act_loss: 0.00453948974609375|cri_loss: 0.002384185791015625|unsuper_loss: 0.0
-average reward score: 5.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.78%) |Training time=0.49s (22.61%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.55
-[2023-04-14 13:15:23,160] [INFO] [logging.py:96:log_dist] [Rank 0] step=7310, skipped=94, lr=[3.8453244085091747e-07, 3.8453244085091747e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:15:23,179] [INFO] [timer.py:199:stop] epoch=0/micro_step=7310/global_step=7310, RunningAvgSamplesPerSec=105.62016882536805, CurrSamplesPerSec=98.96179635438095, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:15:23,271] [INFO] [logging.py:96:log_dist] [Rank 0] step=7310, skipped=124, lr=[2.106887942642588e-07, 2.106887942642588e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7309|ppo_ep: 1|act_loss: 0.0206451416015625|cri_loss: 0.01088714599609375|unsuper_loss: 0.0
-average reward score: 5.9765625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.27%) |Training time=0.49s (22.26%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7310|ppo_ep: 1|act_loss: -0.018463134765625|cri_loss: -0.0088043212890625|unsuper_loss: 0.0
-average reward score: 4.9921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.86%) |Training time=0.49s (22.54%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
-[2023-04-14 13:15:27,598] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 7311|ppo_ep: 1|act_loss: 0.0302734375|cri_loss: 0.0158538818359375|unsuper_loss: 0.0
-average reward score: 5.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.29%) |Training time=0.49s (22.66%) |Others=0.09 (4.05%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.55
-[2023-04-14 13:15:29,755] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 7312|ppo_ep: 1|act_loss: -0.0013437271118164062|cri_loss: -0.0005731582641601562|unsuper_loss: 0.0
-average reward score: 5.6796875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.17%) |Training time=0.49s (22.71%) |Others=0.09 (4.12%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7313|ppo_ep: 1|act_loss: -0.0218505859375|cri_loss: -0.01042938232421875|unsuper_loss: 0.0
-average reward score: 5.234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.81%) |Training time=0.49s (22.67%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7314|ppo_ep: 1|act_loss: 0.013671875|cri_loss: 0.007049560546875|unsuper_loss: 0.0
-average reward score: 5.5234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.95%) |Training time=0.49s (22.42%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7315|ppo_ep: 1|act_loss: -0.041412353515625|cri_loss: -0.0198974609375|unsuper_loss: 0.0
-average reward score: 5.6953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.49s (22.47%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7316|ppo_ep: 1|act_loss: -0.01434326171875|cri_loss: -0.00691986083984375|unsuper_loss: 0.0
-average reward score: 5.4765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.91%) |Training time=0.49s (22.48%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7317|ppo_ep: 1|act_loss: 0.0110321044921875|cri_loss: 0.0056610107421875|unsuper_loss: 0.0
-average reward score: 5.3828125
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.17%) |Training time=0.48s (20.63%) |Others=0.10 (4.19%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7318|ppo_ep: 1|act_loss: -0.0601806640625|cri_loss: -0.00604248046875|unsuper_loss: 0.0
-average reward score: 5.953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.23%) |Training time=0.48s (22.16%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55
-[2023-04-14 13:15:45,007] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 4096, reducing to 2048
-[2023-04-14 13:15:45,008] [INFO] [logging.py:96:log_dist] [Rank 0] step=7320, skipped=95, lr=[3.78018739391981e-07, 3.78018739391981e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:15:45,008] [INFO] [timer.py:199:stop] epoch=0/micro_step=7320/global_step=7320, RunningAvgSamplesPerSec=105.61084278644113, CurrSamplesPerSec=106.58968233799237, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:15:45,100] [INFO] [logging.py:96:log_dist] [Rank 0] step=7320, skipped=126, lr=[2.0760575293895051e-07, 2.0760575293895051e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7319|ppo_ep: 1|act_loss: 0.024017333984375|cri_loss: 0.01222991943359375|unsuper_loss: 0.0
-average reward score: 5.2578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.46s (21.44%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7320|ppo_ep: 1|act_loss: -0.010589599609375|cri_loss: -0.005062103271484375|unsuper_loss: 0.0
-average reward score: 5.375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (22.01%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7321|ppo_ep: 1|act_loss: 0.006988525390625|cri_loss: 0.00362396240234375|unsuper_loss: 0.0
-average reward score: 5.20703125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.85%) |Training time=0.49s (22.57%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7322|ppo_ep: 1|act_loss: -6.532669067382812e-05|cri_loss: 0.00026035308837890625|unsuper_loss: 0.0
-average reward score: 5.2578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.49s (22.38%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7323|ppo_ep: 1|act_loss: 0.02740478515625|cri_loss: 0.01401519775390625|unsuper_loss: 0.0
-average reward score: 5.4921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.98%) |Training time=0.49s (22.51%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7324|ppo_ep: 1|act_loss: 0.03741455078125|cri_loss: 0.019012451171875|unsuper_loss: 0.0
-average reward score: 5.0
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.81%) |Training time=0.49s (22.43%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7325|ppo_ep: 1|act_loss: 0.029541015625|cri_loss: 0.0160675048828125|unsuper_loss: 0.0
-average reward score: 5.4296875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.09%) |Training time=0.48s (22.36%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7326|ppo_ep: 1|act_loss: -0.01361083984375|cri_loss: -0.00656890869140625|unsuper_loss: 0.0
-average reward score: 5.18359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.12%) |Training time=0.48s (22.25%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7327|ppo_ep: 1|act_loss: 0.006519317626953125|cri_loss: 0.003330230712890625|unsuper_loss: 0.0
-average reward score: 6.0625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.97%) |Training time=0.49s (22.50%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7328|ppo_ep: 1|act_loss: 0.0673828125|cri_loss: 0.03973388671875|unsuper_loss: 0.0
-average reward score: 5.75390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.66%) |Training time=0.49s (22.73%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.55
-[2023-04-14 13:16:06,691] [INFO] [logging.py:96:log_dist] [Rank 0] step=7330, skipped=95, lr=[3.708439159571659e-07, 3.708439159571659e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:16:06,709] [INFO] [timer.py:199:stop] epoch=0/micro_step=7330/global_step=7330, RunningAvgSamplesPerSec=105.60095077536732, CurrSamplesPerSec=99.02173837662687, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:16:06,802] [INFO] [logging.py:96:log_dist] [Rank 0] step=7330, skipped=126, lr=[2.0378253563519247e-07, 2.0378253563519247e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7329|ppo_ep: 1|act_loss: -0.0149078369140625|cri_loss: -0.00720977783203125|unsuper_loss: 0.0
-average reward score: 5.48046875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.49s (22.43%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7330|ppo_ep: 1|act_loss: 0.022674560546875|cri_loss: 0.01198577880859375|unsuper_loss: 0.0
-average reward score: 5.4921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.63%) |Training time=0.49s (22.77%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7331|ppo_ep: 1|act_loss: -0.00815582275390625|cri_loss: -0.00391387939453125|unsuper_loss: 0.0
-average reward score: 5.17578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.94%) |Training time=0.49s (22.54%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7332|ppo_ep: 1|act_loss: -0.05584716796875|cri_loss: -0.021484375|unsuper_loss: 0.0
-average reward score: 5.11328125
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.61s (70.82%) |Training time=0.48s (21.26%) |Others=0.18 (7.93%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7333|ppo_ep: 1|act_loss: -0.01401519775390625|cri_loss: -0.006877899169921875|unsuper_loss: 0.0
-average reward score: 5.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.64s (73.48%) |Training time=0.48s (21.45%) |Others=0.11 (5.07%)|CurSamplesPerSec=14.37 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7334|ppo_ep: 1|act_loss: 0.008270263671875|cri_loss: 0.004230499267578125|unsuper_loss: 0.0
-average reward score: 5.16015625
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.10%) |Training time=0.46s (20.64%) |Others=0.12 (5.26%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7335|ppo_ep: 1|act_loss: -0.0372314453125|cri_loss: -0.0173187255859375|unsuper_loss: 0.0
-average reward score: 5.53515625
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.10%) |Training time=0.47s (20.60%) |Others=0.10 (4.31%)|CurSamplesPerSec=14.02 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7336|ppo_ep: 1|act_loss: 0.006072998046875|cri_loss: 0.003406524658203125|unsuper_loss: 0.0
-average reward score: 5.8984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.71%) |Training time=0.47s (21.68%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7337|ppo_ep: 1|act_loss: -0.01271820068359375|cri_loss: -0.00617218017578125|unsuper_loss: 0.0
-average reward score: 4.96484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.83%) |Training time=0.47s (21.64%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7338|ppo_ep: 1|act_loss: 0.031219482421875|cri_loss: 0.0158233642578125|unsuper_loss: 0.0
-average reward score: 5.08984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.67%) |Training time=0.47s (21.71%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
-[2023-04-14 13:16:28,680] [INFO] [logging.py:96:log_dist] [Rank 0] step=7340, skipped=95, lr=[3.6373511386058315e-07, 3.6373511386058315e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:16:28,698] [INFO] [timer.py:199:stop] epoch=0/micro_step=7340/global_step=7340, RunningAvgSamplesPerSec=105.59720320971675, CurrSamplesPerSec=105.43584973840909, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:16:28,791] [INFO] [logging.py:96:log_dist] [Rank 0] step=7340, skipped=126, lr=[1.9999335381357953e-07, 1.9999335381357953e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7339|ppo_ep: 1|act_loss: -0.0055084228515625|cri_loss: -0.002353668212890625|unsuper_loss: 0.0
-average reward score: 5.65625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.97%) |Training time=0.47s (21.52%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7340|ppo_ep: 1|act_loss: -0.002819061279296875|cri_loss: -0.0006628036499023438|unsuper_loss: 0.0
-average reward score: 5.1796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.60%) |Training time=0.47s (21.81%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7341|ppo_ep: 1|act_loss: -0.01428985595703125|cri_loss: -0.006931304931640625|unsuper_loss: 0.0
-average reward score: 5.375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.85%) |Training time=0.47s (21.62%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7342|ppo_ep: 1|act_loss: 0.03863525390625|cri_loss: 0.0203704833984375|unsuper_loss: 0.0
-average reward score: 5.71484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.76%) |Training time=0.47s (21.64%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7343|ppo_ep: 1|act_loss: -0.00783538818359375|cri_loss: -0.003650665283203125|unsuper_loss: 0.0
-average reward score: 5.40625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.59%) |Training time=0.48s (21.90%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7344|ppo_ep: 1|act_loss: 0.00574493408203125|cri_loss: 0.0035114288330078125|unsuper_loss: 0.0
-average reward score: 5.34375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.85%) |Training time=0.47s (21.54%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7345|ppo_ep: 1|act_loss: -0.00555419921875|cri_loss: 0.001644134521484375|unsuper_loss: 0.0
-average reward score: 5.43359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.14%) |Training time=0.46s (21.34%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7346|ppo_ep: 1|act_loss: -0.0234527587890625|cri_loss: -0.011444091796875|unsuper_loss: 0.0
-average reward score: 5.39453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.35%) |Training time=0.45s (21.01%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7347|ppo_ep: 1|act_loss: 0.0014638900756835938|cri_loss: 0.0008153915405273438|unsuper_loss: 0.0
-average reward score: 6.046875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.93%) |Training time=0.47s (21.52%) |Others=0.12 (5.55%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7348|ppo_ep: 1|act_loss: -0.0240478515625|cri_loss: -0.01140594482421875|unsuper_loss: 0.0
-average reward score: 5.78125
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.18%) |Training time=0.46s (20.09%) |Others=0.11 (4.72%)|CurSamplesPerSec=14.02 |AvgSamplesPerSec=14.55
-[2023-04-14 13:16:50,529] [INFO] [logging.py:96:log_dist] [Rank 0] step=7350, skipped=95, lr=[3.5669243847182864e-07, 3.5669243847182864e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:16:50,547] [INFO] [timer.py:199:stop] epoch=0/micro_step=7350/global_step=7350, RunningAvgSamplesPerSec=105.59639078906564, CurrSamplesPerSec=104.471301040761, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:16:50,640] [INFO] [logging.py:96:log_dist] [Rank 0] step=7350, skipped=126, lr=[1.962382636389268e-07, 1.962382636389268e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7349|ppo_ep: 1|act_loss: -0.00226593017578125|cri_loss: -0.0006923675537109375|unsuper_loss: 0.0
-average reward score: 5.5703125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.95%) |Training time=0.47s (21.54%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7350|ppo_ep: 1|act_loss: -0.0267333984375|cri_loss: -0.01296234130859375|unsuper_loss: 0.0
-average reward score: 5.69140625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.38%) |Training time=0.48s (22.07%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7351|ppo_ep: 1|act_loss: -0.008514404296875|cri_loss: -0.0030345916748046875|unsuper_loss: 0.0
-average reward score: 5.5703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.89%) |Training time=0.47s (21.58%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7352|ppo_ep: 1|act_loss: -0.0154571533203125|cri_loss: -0.007434844970703125|unsuper_loss: 0.0
-average reward score: 5.89453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.02%) |Training time=0.46s (21.35%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7353|ppo_ep: 1|act_loss: -0.00461578369140625|cri_loss: -0.002185821533203125|unsuper_loss: 0.0
-average reward score: 5.3359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.62%) |Training time=0.48s (21.86%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7354|ppo_ep: 1|act_loss: -0.0004100799560546875|cri_loss: 8.821487426757812e-06|unsuper_loss: 0.0
-average reward score: 5.7578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.74%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7355|ppo_ep: 1|act_loss: 0.015960693359375|cri_loss: 0.00814056396484375|unsuper_loss: 0.0
-average reward score: 6.1015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.31%) |Training time=0.46s (21.15%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7356|ppo_ep: 1|act_loss: 0.0009984970092773438|cri_loss: 0.0006694793701171875|unsuper_loss: 0.0
-average reward score: 5.80859375
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.09%) |Training time=0.48s (21.66%) |Others=0.14 (6.25%)|CurSamplesPerSec=14.45 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7357|ppo_ep: 1|act_loss: 0.05804443359375|cri_loss: 0.0310516357421875|unsuper_loss: 0.0
-average reward score: 4.7578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.80%) |Training time=0.47s (21.66%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7358|ppo_ep: 1|act_loss: 0.003452301025390625|cri_loss: 0.0018177032470703125|unsuper_loss: 0.0
-average reward score: 5.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.06%) |Training time=0.48s (22.31%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.55
-[2023-04-14 13:17:12,274] [INFO] [logging.py:96:log_dist] [Rank 0] step=7360, skipped=95, lr=[3.4971599418034037e-07, 3.4971599418034037e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:17:12,293] [INFO] [timer.py:199:stop] epoch=0/micro_step=7360/global_step=7360, RunningAvgSamplesPerSec=105.59306648686497, CurrSamplesPerSec=97.36653804664417, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:17:12,386] [INFO] [logging.py:96:log_dist] [Rank 0] step=7360, skipped=126, lr=[1.9251732077072925e-07, 1.9251732077072925e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7359|ppo_ep: 1|act_loss: 0.00797271728515625|cri_loss: 0.00403594970703125|unsuper_loss: 0.0
-average reward score: 5.0078125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.98%) |Training time=0.49s (22.55%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7360|ppo_ep: 1|act_loss: 0.01273345947265625|cri_loss: 0.0066986083984375|unsuper_loss: 0.0
-average reward score: 5.984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.30%) |Training time=0.47s (22.04%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7361|ppo_ep: 1|act_loss: -0.01342010498046875|cri_loss: -0.00612640380859375|unsuper_loss: 0.0
-average reward score: 5.4453125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.55%) |Training time=0.47s (21.80%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7362|ppo_ep: 1|act_loss: 0.014251708984375|cri_loss: 0.0075225830078125|unsuper_loss: 0.0
-average reward score: 5.5546875
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.74s (74.70%) |Training time=0.49s (21.01%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7363|ppo_ep: 1|act_loss: -7.724761962890625e-05|cri_loss: 0.0008220672607421875|unsuper_loss: 0.0
-average reward score: 4.68359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.92%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7364|ppo_ep: 1|act_loss: -0.0110931396484375|cri_loss: -0.005306243896484375|unsuper_loss: 0.0
-average reward score: 4.98046875
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.59%) |Training time=0.48s (21.81%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7365|ppo_ep: 1|act_loss: -0.002201080322265625|cri_loss: -0.0009555816650390625|unsuper_loss: 0.0
-average reward score: 5.24609375
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.67s (74.23%) |Training time=0.47s (20.97%) |Others=0.11 (4.79%)|CurSamplesPerSec=14.20 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7366|ppo_ep: 1|act_loss: -0.0089569091796875|cri_loss: -0.004367828369140625|unsuper_loss: 0.0
-average reward score: 5.59375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.78%) |Training time=0.46s (21.56%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7367|ppo_ep: 1|act_loss: -0.0165863037109375|cri_loss: -0.00789642333984375|unsuper_loss: 0.0
-average reward score: 5.66796875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.33%) |Training time=0.48s (22.11%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7368|ppo_ep: 1|act_loss: -0.010040283203125|cri_loss: -0.004924774169921875|unsuper_loss: 0.0
-average reward score: 5.265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.06%) |Training time=0.48s (22.31%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.55
-[2023-04-14 13:17:34,126] [INFO] [logging.py:96:log_dist] [Rank 0] step=7370, skipped=95, lr=[3.428058843938532e-07, 3.428058843938532e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:17:34,144] [INFO] [timer.py:199:stop] epoch=0/micro_step=7370/global_step=7370, RunningAvgSamplesPerSec=105.58833846823492, CurrSamplesPerSec=101.61320161196909, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:17:34,237] [INFO] [logging.py:96:log_dist] [Rank 0] step=7370, skipped=126, lr=[1.888305803623372e-07, 1.888305803623372e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7369|ppo_ep: 1|act_loss: -0.023406982421875|cri_loss: -0.0115966796875|unsuper_loss: 0.0
-average reward score: 5.31640625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.24%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7370|ppo_ep: 1|act_loss: 0.0177001953125|cri_loss: 0.009124755859375|unsuper_loss: 0.0
-average reward score: 4.90625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.10%) |Training time=0.48s (22.28%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7371|ppo_ep: 1|act_loss: 0.00457763671875|cri_loss: 0.0029449462890625|unsuper_loss: 0.0
-average reward score: 5.84375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.33%) |Training time=0.47s (22.02%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7372|ppo_ep: 1|act_loss: -0.011962890625|cri_loss: -0.00588226318359375|unsuper_loss: 0.0
-average reward score: 5.16015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.99%) |Training time=0.48s (22.41%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7373|ppo_ep: 1|act_loss: -0.016937255859375|cri_loss: -0.00826263427734375|unsuper_loss: 0.0
-average reward score: 5.61328125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.30%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7374|ppo_ep: 1|act_loss: -0.0300140380859375|cri_loss: -0.01454925537109375|unsuper_loss: 0.0
-average reward score: 5.08203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.49%) |Training time=0.47s (21.84%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7375|ppo_ep: 1|act_loss: 0.0208740234375|cri_loss: 0.01078033447265625|unsuper_loss: 0.0
-average reward score: 4.859375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.47s (22.02%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7376|ppo_ep: 1|act_loss: 0.0167388916015625|cri_loss: 0.00910186767578125|unsuper_loss: 0.0
-average reward score: 5.23046875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.46%) |Training time=0.47s (21.87%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7377|ppo_ep: 1|act_loss: -0.005702972412109375|cri_loss: -0.0024814605712890625|unsuper_loss: 0.0
-average reward score: 5.5
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.47%) |Training time=0.47s (20.22%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.77 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7378|ppo_ep: 1|act_loss: -0.01103973388671875|cri_loss: -0.00537872314453125|unsuper_loss: 0.0
-average reward score: 5.4296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.61%) |Training time=0.47s (21.72%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.55
-[2023-04-14 13:17:55,852] [INFO] [logging.py:96:log_dist] [Rank 0] step=7380, skipped=95, lr=[3.359622115368647e-07, 3.359622115368647e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:17:55,870] [INFO] [timer.py:199:stop] epoch=0/micro_step=7380/global_step=7380, RunningAvgSamplesPerSec=105.58390759317957, CurrSamplesPerSec=101.95661566977104, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:17:55,967] [INFO] [logging.py:96:log_dist] [Rank 0] step=7380, skipped=126, lr=[1.851780970601366e-07, 1.851780970601366e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7379|ppo_ep: 1|act_loss: 0.0010738372802734375|cri_loss: 0.0008387565612792969|unsuper_loss: 0.0
-average reward score: 5.51171875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.22%) |Training time=0.48s (22.08%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7380|ppo_ep: 1|act_loss: -0.0142974853515625|cri_loss: -0.006511688232421875|unsuper_loss: 0.0
-average reward score: 5.37890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.11%) |Training time=0.46s (21.22%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7381|ppo_ep: 1|act_loss: 0.0143585205078125|cri_loss: 0.0078277587890625|unsuper_loss: 0.0
-average reward score: 5.484375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (20.95%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7382|ppo_ep: 1|act_loss: -0.0185394287109375|cri_loss: -0.0091094970703125|unsuper_loss: 0.0
-average reward score: 5.1796875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.07%) |Training time=0.46s (21.28%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7383|ppo_ep: 1|act_loss: 0.018524169921875|cri_loss: 0.011260986328125|unsuper_loss: 0.0
-average reward score: 5.375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.26%) |Training time=0.45s (21.17%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7384|ppo_ep: 1|act_loss: -0.0389404296875|cri_loss: -0.0170745849609375|unsuper_loss: 0.0
-average reward score: 5.3984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.08%) |Training time=0.46s (21.29%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7385|ppo_ep: 1|act_loss: -0.00312042236328125|cri_loss: -0.0014448165893554688|unsuper_loss: 0.0
-average reward score: 5.80078125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.15%) |Training time=0.46s (21.20%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7386|ppo_ep: 1|act_loss: -0.028533935546875|cri_loss: -0.0136566162109375|unsuper_loss: 0.0
-average reward score: 5.24609375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.92%) |Training time=0.46s (21.41%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7387|ppo_ep: 1|act_loss: 0.0108184814453125|cri_loss: 0.005504608154296875|unsuper_loss: 0.0
-average reward score: 5.8125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.17%) |Training time=0.46s (21.21%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7388|ppo_ep: 1|act_loss: -0.003536224365234375|cri_loss: -0.001667022705078125|unsuper_loss: 0.0
-average reward score: 5.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.02%) |Training time=0.46s (21.33%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55
-[2023-04-14 13:18:17,398] [INFO] [logging.py:96:log_dist] [Rank 0] step=7390, skipped=95, lr=[3.2918507704911455e-07, 3.2918507704911455e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:18:17,416] [INFO] [timer.py:199:stop] epoch=0/micro_step=7390/global_step=7390, RunningAvgSamplesPerSec=105.5878590176888, CurrSamplesPerSec=107.56306313887185, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:18:17,509] [INFO] [logging.py:96:log_dist] [Rank 0] step=7390, skipped=126, lr=[1.8155992500274112e-07, 1.8155992500274112e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7389|ppo_ep: 1|act_loss: 0.023468017578125|cri_loss: 0.01312255859375|unsuper_loss: 0.0
-average reward score: 5.6953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.11%) |Training time=0.46s (21.34%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7390|ppo_ep: 1|act_loss: 0.00725555419921875|cri_loss: 0.00370025634765625|unsuper_loss: 0.0
-average reward score: 5.94921875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.17%) |Training time=0.46s (21.18%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7391|ppo_ep: 1|act_loss: -0.00531768798828125|cri_loss: -0.0025043487548828125|unsuper_loss: 0.0
-average reward score: 5.39453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.38%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7392|ppo_ep: 1|act_loss: 0.00354766845703125|cri_loss: 0.00273895263671875|unsuper_loss: 0.0
-average reward score: 5.171875
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.68%) |Training time=0.46s (19.76%) |Others=0.11 (4.56%)|CurSamplesPerSec=13.77 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7393|ppo_ep: 1|act_loss: -0.00673675537109375|cri_loss: -0.0030460357666015625|unsuper_loss: 0.0
-average reward score: 5.19921875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.65%) |Training time=0.45s (20.69%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7394|ppo_ep: 1|act_loss: 0.0222015380859375|cri_loss: 0.0114288330078125|unsuper_loss: 0.0
-average reward score: 6.484375
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.73%) |Training time=0.48s (20.87%) |Others=0.10 (4.40%)|CurSamplesPerSec=14.06 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7395|ppo_ep: 1|act_loss: -0.0157470703125|cri_loss: -0.00746917724609375|unsuper_loss: 0.0
-average reward score: 5.55078125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.94%) |Training time=0.46s (21.39%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7396|ppo_ep: 1|act_loss: -0.01318359375|cri_loss: -0.005802154541015625|unsuper_loss: 0.0
-average reward score: 5.64453125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.34%) |Training time=0.45s (20.99%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7397|ppo_ep: 1|act_loss: 0.004085540771484375|cri_loss: 0.00223541259765625|unsuper_loss: 0.0
-average reward score: 4.953125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.21%) |Training time=0.45s (21.14%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7398|ppo_ep: 1|act_loss: 0.0054931640625|cri_loss: 0.0029010772705078125|unsuper_loss: 0.0
-average reward score: 5.41015625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.46s (21.21%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55
-[2023-04-14 13:18:39,217] [INFO] [logging.py:96:log_dist] [Rank 0] step=7400, skipped=95, lr=[3.2247458138408513e-07, 3.2247458138408513e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:18:39,235] [INFO] [timer.py:199:stop] epoch=0/micro_step=7400/global_step=7400, RunningAvgSamplesPerSec=105.59212222294153, CurrSamplesPerSec=110.47226664581524, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:18:39,328] [INFO] [logging.py:96:log_dist] [Rank 0] step=7400, skipped=126, lr=[1.7797611782018942e-07, 1.7797611782018942e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7399|ppo_ep: 1|act_loss: 0.00040078163146972656|cri_loss: 0.0002589225769042969|unsuper_loss: 0.0
-average reward score: 5.84765625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.30%) |Training time=0.45s (21.05%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7400|ppo_ep: 1|act_loss: 0.05859375|cri_loss: 0.03192138671875|unsuper_loss: 0.0
-average reward score: 5.3359375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.40%) |Training time=0.45s (20.92%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7401|ppo_ep: 1|act_loss: 0.004222869873046875|cri_loss: 0.0022983551025390625|unsuper_loss: 0.0
-average reward score: 4.2421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.05%) |Training time=0.46s (21.31%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7402|ppo_ep: 1|act_loss: -0.022674560546875|cri_loss: -0.01120758056640625|unsuper_loss: 0.0
-average reward score: 5.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.16%) |Training time=0.45s (21.10%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7403|ppo_ep: 1|act_loss: 0.02545166015625|cri_loss: 0.01396942138671875|unsuper_loss: 0.0
-average reward score: 5.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.46%) |Training time=0.45s (20.98%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7404|ppo_ep: 1|act_loss: 0.02581787109375|cri_loss: 0.01312255859375|unsuper_loss: 0.0
-average reward score: 5.3984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.09%) |Training time=0.46s (21.27%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7405|ppo_ep: 1|act_loss: 0.0230865478515625|cri_loss: 0.01181793212890625|unsuper_loss: 0.0
-average reward score: 5.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.49%) |Training time=0.47s (21.85%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7406|ppo_ep: 1|act_loss: 0.04376220703125|cri_loss: 0.0252227783203125|unsuper_loss: 0.0
-average reward score: 5.625
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.58s (69.98%) |Training time=0.49s (21.57%) |Others=0.19 (8.45%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7407|ppo_ep: 1|act_loss: 0.018585205078125|cri_loss: 0.009429931640625|unsuper_loss: 0.0
-average reward score: 5.03125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.05%) |Training time=0.47s (21.26%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7408|ppo_ep: 1|act_loss: 0.018463134765625|cri_loss: 0.00962066650390625|unsuper_loss: 0.0
-average reward score: 5.6875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.64%) |Training time=0.47s (21.71%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.55
-[2023-04-14 13:19:00,873] [INFO] [logging.py:96:log_dist] [Rank 0] step=7410, skipped=95, lr=[3.158308240075113e-07, 3.158308240075113e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:19:00,892] [INFO] [timer.py:199:stop] epoch=0/micro_step=7410/global_step=7410, RunningAvgSamplesPerSec=105.59323097908113, CurrSamplesPerSec=103.80286557061606, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:19:00,985] [INFO] [logging.py:96:log_dist] [Rank 0] step=7410, skipped=126, lr=[1.744267286331497e-07, 1.744267286331497e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7409|ppo_ep: 1|act_loss: 0.0030670166015625|cri_loss: 0.0015916824340820312|unsuper_loss: 0.0
-average reward score: 5.86328125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.45%) |Training time=0.47s (21.90%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7410|ppo_ep: 1|act_loss: -0.0164031982421875|cri_loss: -0.008026123046875|unsuper_loss: 0.0
-average reward score: 5.06640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (21.95%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7411|ppo_ep: 1|act_loss: -0.005390167236328125|cri_loss: -0.002544403076171875|unsuper_loss: 0.0
-average reward score: 5.328125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.98%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7412|ppo_ep: 1|act_loss: 0.008453369140625|cri_loss: 0.00438690185546875|unsuper_loss: 0.0
-average reward score: 5.4296875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.88%) |Training time=0.46s (21.42%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.55
-[2023-04-14 13:19:09,581] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 7413|ppo_ep: 1|act_loss: -0.0165252685546875|cri_loss: -0.00797271728515625|unsuper_loss: 0.0
-average reward score: 5.2109375
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.54%) |Training time=0.45s (21.23%) |Others=0.09 (4.23%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.55
-[2023-04-14 13:19:11,712] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 7414|ppo_ep: 1|act_loss: -0.00782012939453125|cri_loss: -0.0037174224853515625|unsuper_loss: 0.0
-average reward score: 5.5703125
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.31%) |Training time=0.46s (21.49%) |Others=0.09 (4.20%)|CurSamplesPerSec=15.02 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7415|ppo_ep: 1|act_loss: -0.019073486328125|cri_loss: -0.009246826171875|unsuper_loss: 0.0
-average reward score: 6.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.44%) |Training time=0.45s (20.91%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7416|ppo_ep: 1|act_loss: -0.026031494140625|cri_loss: -0.01288604736328125|unsuper_loss: 0.0
-average reward score: 5.0234375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.88%) |Training time=0.46s (21.45%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7417|ppo_ep: 1|act_loss: -0.0194549560546875|cri_loss: -0.0088043212890625|unsuper_loss: 0.0
-average reward score: 5.34765625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.92%) |Training time=0.46s (21.40%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7418|ppo_ep: 1|act_loss: -0.02740478515625|cri_loss: -0.01326751708984375|unsuper_loss: 0.0
-average reward score: 6.375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.78%) |Training time=0.46s (21.56%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.55
-[2023-04-14 13:19:22,331] [INFO] [logging.py:96:log_dist] [Rank 0] step=7420, skipped=95, lr=[3.0925390339590433e-07, 3.0925390339590433e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:19:22,349] [INFO] [timer.py:199:stop] epoch=0/micro_step=7420/global_step=7420, RunningAvgSamplesPerSec=105.59626399643966, CurrSamplesPerSec=107.24917276422602, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:19:22,442] [INFO] [logging.py:96:log_dist] [Rank 0] step=7420, skipped=128, lr=[1.7161203361170996e-07, 1.7161203361170996e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7419|ppo_ep: 1|act_loss: -0.01311492919921875|cri_loss: -0.006343841552734375|unsuper_loss: 0.0
-average reward score: 5.234375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.64%) |Training time=0.46s (21.47%) |Others=0.11 (4.89%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7420|ppo_ep: 1|act_loss: -0.0055694580078125|cri_loss: -0.0026264190673828125|unsuper_loss: 0.0
-average reward score: 5.65625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.85%) |Training time=0.46s (21.50%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7421|ppo_ep: 1|act_loss: -0.005157470703125|cri_loss: -0.002201080322265625|unsuper_loss: 0.0
-average reward score: 5.5546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.46s (21.54%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7422|ppo_ep: 1|act_loss: -0.018798828125|cri_loss: -0.00916290283203125|unsuper_loss: 0.0
-average reward score: 5.54296875
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.88%) |Training time=0.46s (19.78%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7423|ppo_ep: 1|act_loss: 0.0019893646240234375|cri_loss: 0.0019683837890625|unsuper_loss: 0.0
-average reward score: 5.0234375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.36%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7424|ppo_ep: 1|act_loss: -0.0025959014892578125|cri_loss: -0.0012378692626953125|unsuper_loss: 0.0
-average reward score: 6.0703125
--------------------------------------------------------------------------------------
-|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.68s (75.04%) |Training time=0.46s (20.45%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.29 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7425|ppo_ep: 1|act_loss: 0.00028014183044433594|cri_loss: 0.0005426406860351562|unsuper_loss: 0.0
-average reward score: 5.34375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.23%) |Training time=0.45s (21.18%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7426|ppo_ep: 1|act_loss: 0.028900146484375|cri_loss: 0.01474761962890625|unsuper_loss: 0.0
-average reward score: 5.2578125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.83%) |Training time=0.46s (21.53%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7427|ppo_ep: 1|act_loss: 0.017486572265625|cri_loss: 0.0092010498046875|unsuper_loss: 0.0
-average reward score: 5.984375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.82%) |Training time=0.46s (21.60%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7428|ppo_ep: 1|act_loss: 0.0107269287109375|cri_loss: 0.0064697265625|unsuper_loss: 0.0
-average reward score: 5.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.72%) |Training time=0.46s (21.61%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.55
-[2023-04-14 13:19:44,065] [INFO] [logging.py:96:log_dist] [Rank 0] step=7430, skipped=95, lr=[3.0274391703509267e-07, 3.0274391703509267e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:19:44,083] [INFO] [timer.py:199:stop] epoch=0/micro_step=7430/global_step=7430, RunningAvgSamplesPerSec=105.59870905382144, CurrSamplesPerSec=105.69964632309106, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:19:44,176] [INFO] [logging.py:96:log_dist] [Rank 0] step=7430, skipped=128, lr=[1.681247290517518e-07, 1.681247290517518e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7429|ppo_ep: 1|act_loss: 0.041107177734375|cri_loss: 0.02197265625|unsuper_loss: 0.0
-average reward score: 5.44140625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.72%) |Training time=0.47s (21.71%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7430|ppo_ep: 1|act_loss: 0.0006413459777832031|cri_loss: 0.00042319297790527344|unsuper_loss: 0.0
-average reward score: 5.08203125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.05%) |Training time=0.51s (23.39%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7431|ppo_ep: 1|act_loss: 0.0380859375|cri_loss: 0.0194244384765625|unsuper_loss: 0.0
-average reward score: 5.5546875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.00%) |Training time=0.42s (19.45%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7432|ppo_ep: 1|act_loss: 0.008270263671875|cri_loss: 0.004383087158203125|unsuper_loss: 0.0
-average reward score: 5.53125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.81%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7433|ppo_ep: 1|act_loss: -0.0036334991455078125|cri_loss: -0.0017004013061523438|unsuper_loss: 0.0
-average reward score: 4.9921875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.98%) |Training time=0.47s (21.38%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7434|ppo_ep: 1|act_loss: -0.002941131591796875|cri_loss: -0.0012197494506835938|unsuper_loss: 0.0
-average reward score: 5.09375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.92%) |Training time=0.46s (21.42%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7435|ppo_ep: 1|act_loss: -0.00490570068359375|cri_loss: -0.0023326873779296875|unsuper_loss: 0.0
-average reward score: 5.29296875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.76%) |Training time=0.47s (21.61%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7436|ppo_ep: 1|act_loss: 0.0081329345703125|cri_loss: 0.00434112548828125|unsuper_loss: 0.0
-average reward score: 5.39453125
--------------------------------------------------------------------------------------
-|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.42%) |Training time=0.48s (20.28%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.55 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7437|ppo_ep: 1|act_loss: 0.00555419921875|cri_loss: 0.0030307769775390625|unsuper_loss: 0.0
-average reward score: 4.88671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.49%) |Training time=0.45s (20.86%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7438|ppo_ep: 1|act_loss: -0.00777435302734375|cri_loss: -0.0036983489990234375|unsuper_loss: 0.0
-average reward score: 4.9609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.65%) |Training time=0.47s (21.70%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
-[2023-04-14 13:20:05,976] [INFO] [logging.py:96:log_dist] [Rank 0] step=7440, skipped=95, lr=[2.9630096141877935e-07, 2.9630096141877935e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:20:05,994] [INFO] [timer.py:199:stop] epoch=0/micro_step=7440/global_step=7440, RunningAvgSamplesPerSec=105.59770834207129, CurrSamplesPerSec=103.76915880769367, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:20:06,087] [INFO] [logging.py:96:log_dist] [Rank 0] step=7440, skipped=128, lr=[1.646719885086523e-07, 1.646719885086523e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7439|ppo_ep: 1|act_loss: -0.0347900390625|cri_loss: -0.017120361328125|unsuper_loss: 0.0
-average reward score: 5.25
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.71%) |Training time=0.47s (21.76%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7440|ppo_ep: 1|act_loss: -0.00661468505859375|cri_loss: -0.0029449462890625|unsuper_loss: 0.0
-average reward score: 6.734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.58%) |Training time=0.47s (21.77%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7441|ppo_ep: 1|act_loss: -0.016204833984375|cri_loss: -0.00795745849609375|unsuper_loss: 0.0
-average reward score: 5.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.05%) |Training time=0.46s (21.41%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7442|ppo_ep: 1|act_loss: 0.031158447265625|cri_loss: 0.016265869140625|unsuper_loss: 0.0
-average reward score: 5.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.99%) |Training time=0.46s (21.36%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55
-epoch: 0|step: 7443|ppo_ep: 1|act_loss: -0.027191162109375|cri_loss: -0.0133209228515625|unsuper_loss: 0.0
-average reward score: 5.6484375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.33%) |Training time=0.45s (21.10%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7444|ppo_ep: 1|act_loss: 0.01097869873046875|cri_loss: 0.0056304931640625|unsuper_loss: 0.0
-average reward score: 5.0859375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.45s (21.11%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7445|ppo_ep: 1|act_loss: 0.030792236328125|cri_loss: 0.016082763671875|unsuper_loss: 0.0
-average reward score: 5.91796875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.78%) |Training time=0.44s (20.65%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7446|ppo_ep: 1|act_loss: -0.001796722412109375|cri_loss: -0.0007958412170410156|unsuper_loss: 0.0
-average reward score: 5.43359375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.85%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7447|ppo_ep: 1|act_loss: -0.0030612945556640625|cri_loss: -0.0014820098876953125|unsuper_loss: 0.0
-average reward score: 5.9140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.52%) |Training time=0.46s (20.97%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7448|ppo_ep: 1|act_loss: 0.0092315673828125|cri_loss: 0.00507354736328125|unsuper_loss: 0.0
-average reward score: 5.92578125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.51%) |Training time=0.45s (20.81%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56
-[2023-04-14 13:20:27,533] [INFO] [logging.py:96:log_dist] [Rank 0] step=7450, skipped=95, lr=[2.89925132047109e-07, 2.89925132047109e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:20:27,552] [INFO] [timer.py:199:stop] epoch=0/micro_step=7450/global_step=7450, RunningAvgSamplesPerSec=105.60254928538421, CurrSamplesPerSec=110.05864481747192, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:20:27,644] [INFO] [logging.py:96:log_dist] [Rank 0] step=7450, skipped=128, lr=[1.6125386316035496e-07, 1.6125386316035496e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7449|ppo_ep: 1|act_loss: -0.0038928985595703125|cri_loss: -0.0017147064208984375|unsuper_loss: 0.0
-average reward score: 5.84375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.27%) |Training time=0.45s (21.17%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7450|ppo_ep: 1|act_loss: 0.0303497314453125|cri_loss: 0.016021728515625|unsuper_loss: 0.0
-average reward score: 5.95703125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.80%) |Training time=0.46s (21.54%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7451|ppo_ep: 1|act_loss: 0.006317138671875|cri_loss: 0.0035724639892578125|unsuper_loss: 0.0
-average reward score: 5.42578125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.06%) |Training time=0.46s (21.36%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7452|ppo_ep: 1|act_loss: 0.04852294921875|cri_loss: 0.026397705078125|unsuper_loss: 0.0
-average reward score: 5.05078125
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.20%) |Training time=0.45s (19.47%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.80 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7453|ppo_ep: 1|act_loss: -0.019744873046875|cri_loss: -0.0097503662109375|unsuper_loss: 0.0
-average reward score: 5.6171875
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.46%) |Training time=0.46s (20.60%) |Others=0.13 (5.95%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7454|ppo_ep: 1|act_loss: 0.013275146484375|cri_loss: 0.0069122314453125|unsuper_loss: 0.0
-average reward score: 5.0
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.65s (74.51%) |Training time=0.46s (20.95%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7455|ppo_ep: 1|act_loss: 0.0143890380859375|cri_loss: 0.007434844970703125|unsuper_loss: 0.0
-average reward score: 5.35546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.11%) |Training time=0.46s (21.30%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7456|ppo_ep: 1|act_loss: 0.00958251953125|cri_loss: 0.005680084228515625|unsuper_loss: 0.0
-average reward score: 5.5
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.46s (21.45%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7457|ppo_ep: 1|act_loss: 0.0296630859375|cri_loss: 0.0150909423828125|unsuper_loss: 0.0
-average reward score: 4.953125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.95%) |Training time=0.49s (22.54%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7458|ppo_ep: 1|act_loss: 0.00717926025390625|cri_loss: 0.0038280487060546875|unsuper_loss: 0.0
-average reward score: 5.3515625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.42%) |Training time=0.50s (23.00%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.56
-[2023-04-14 13:20:49,459] [INFO] [logging.py:96:log_dist] [Rank 0] step=7460, skipped=95, lr=[2.8361652342525515e-07, 2.8361652342525515e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:20:49,477] [INFO] [timer.py:199:stop] epoch=0/micro_step=7460/global_step=7460, RunningAvgSamplesPerSec=105.5989674076688, CurrSamplesPerSec=88.6655405472359, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:20:49,570] [INFO] [logging.py:96:log_dist] [Rank 0] step=7460, skipped=128, lr=[1.5787040367172379e-07, 1.5787040367172379e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7459|ppo_ep: 1|act_loss: 0.006500244140625|cri_loss: 0.0033111572265625|unsuper_loss: 0.0
-average reward score: 5.4765625
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.89%) |Training time=0.52s (23.68%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7460|ppo_ep: 1|act_loss: -0.029205322265625|cri_loss: -0.013946533203125|unsuper_loss: 0.0
-average reward score: 5.76171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.13%) |Training time=0.48s (22.25%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7461|ppo_ep: 1|act_loss: 0.010284423828125|cri_loss: 0.005336761474609375|unsuper_loss: 0.0
-average reward score: 5.64453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.48s (22.07%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7462|ppo_ep: 1|act_loss: 0.016448974609375|cri_loss: 0.008453369140625|unsuper_loss: 0.0
-average reward score: 5.2734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.14%) |Training time=0.48s (22.25%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7463|ppo_ep: 1|act_loss: 0.0008373260498046875|cri_loss: 0.0004830360412597656|unsuper_loss: 0.0
-average reward score: 5.63671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.46%) |Training time=0.48s (22.00%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7464|ppo_ep: 1|act_loss: 0.019256591796875|cri_loss: 0.01026153564453125|unsuper_loss: 0.0
-average reward score: 5.9453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.48s (22.28%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7465|ppo_ep: 1|act_loss: 0.00318145751953125|cri_loss: 0.0030651092529296875|unsuper_loss: 0.0
-average reward score: 5.6171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.48%) |Training time=0.48s (21.98%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7466|ppo_ep: 1|act_loss: 0.0126190185546875|cri_loss: 0.006465911865234375|unsuper_loss: 0.0
-average reward score: 5.3671875
--------------------------------------------------------------------------------------
-|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.06%) |Training time=0.51s (21.69%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.50 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7467|ppo_ep: 1|act_loss: 0.009735107421875|cri_loss: 0.004962921142578125|unsuper_loss: 0.0
-average reward score: 5.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.29%) |Training time=0.48s (22.20%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7468|ppo_ep: 1|act_loss: -0.019805908203125|cri_loss: -0.00965118408203125|unsuper_loss: 0.0
-average reward score: 5.40625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.22%) |Training time=0.48s (22.19%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.56
-[2023-04-14 13:21:11,390] [INFO] [logging.py:96:log_dist] [Rank 0] step=7470, skipped=95, lr=[2.7737522906201384e-07, 2.7737522906201384e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:21:11,408] [INFO] [timer.py:199:stop] epoch=0/micro_step=7470/global_step=7470, RunningAvgSamplesPerSec=105.58988139804403, CurrSamplesPerSec=98.6508434562043, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:21:11,501] [INFO] [logging.py:96:log_dist] [Rank 0] step=7470, skipped=128, lr=[1.5452166019378989e-07, 1.5452166019378989e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7469|ppo_ep: 1|act_loss: -0.00325775146484375|cri_loss: -0.001453399658203125|unsuper_loss: 0.0
-average reward score: 5.12890625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.17%) |Training time=0.49s (22.33%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7470|ppo_ep: 1|act_loss: 0.00650787353515625|cri_loss: 0.0034236907958984375|unsuper_loss: 0.0
-average reward score: 5.0078125
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.17%) |Training time=0.49s (22.28%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.49 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7471|ppo_ep: 1|act_loss: 0.00421905517578125|cri_loss: 0.003101348876953125|unsuper_loss: 0.0
-average reward score: 5.09375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.45%) |Training time=0.48s (22.00%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7472|ppo_ep: 1|act_loss: 0.02142333984375|cri_loss: 0.011138916015625|unsuper_loss: 0.0
-average reward score: 5.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.25%) |Training time=0.48s (22.04%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7473|ppo_ep: 1|act_loss: 0.015045166015625|cri_loss: 0.00791168212890625|unsuper_loss: 0.0
-average reward score: 5.03125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.34%) |Training time=0.48s (22.14%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7474|ppo_ep: 1|act_loss: 6.431341171264648e-05|cri_loss: 0.00018262863159179688|unsuper_loss: 0.0
-average reward score: 5.609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.90%) |Training time=0.49s (22.51%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7475|ppo_ep: 1|act_loss: 0.01222991943359375|cri_loss: 0.006435394287109375|unsuper_loss: 0.0
-average reward score: 5.23828125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.21%) |Training time=0.48s (22.27%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7476|ppo_ep: 1|act_loss: 0.0091552734375|cri_loss: 0.00495147705078125|unsuper_loss: 0.0
-average reward score: 5.109375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.78%) |Training time=0.49s (22.64%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7477|ppo_ep: 1|act_loss: -0.018341064453125|cri_loss: -0.009063720703125|unsuper_loss: 0.0
-average reward score: 5.07421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.05%) |Training time=0.49s (22.43%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7478|ppo_ep: 1|act_loss: 0.0106048583984375|cri_loss: 0.005496978759765625|unsuper_loss: 0.0
-average reward score: 5.2421875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.79%) |Training time=0.49s (22.60%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.56
-[2023-04-14 13:21:33,187] [INFO] [logging.py:96:log_dist] [Rank 0] step=7480, skipped=95, lr=[2.71201341468426e-07, 2.71201341468426e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:21:33,206] [INFO] [timer.py:199:stop] epoch=0/micro_step=7480/global_step=7480, RunningAvgSamplesPerSec=105.5795860028403, CurrSamplesPerSec=96.38076143670229, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:21:33,298] [INFO] [logging.py:96:log_dist] [Rank 0] step=7480, skipped=128, lr=[1.5120768236301042e-07, 1.5120768236301042e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7479|ppo_ep: 1|act_loss: -0.0081634521484375|cri_loss: -0.0033721923828125|unsuper_loss: 0.0
-average reward score: 5.27734375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.75%) |Training time=0.50s (22.75%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7480|ppo_ep: 1|act_loss: 0.0211944580078125|cri_loss: 0.01141357421875|unsuper_loss: 0.0
-average reward score: 5.00390625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.88%) |Training time=0.49s (22.53%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7481|ppo_ep: 1|act_loss: 0.0014867782592773438|cri_loss: 0.0010585784912109375|unsuper_loss: 0.0
-average reward score: 6.0859375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.59%) |Training time=0.49s (22.41%) |Others=0.11 (5.00%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7482|ppo_ep: 1|act_loss: 0.006160736083984375|cri_loss: 0.003284454345703125|unsuper_loss: 0.0
-average reward score: 6.0078125
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.88%) |Training time=0.49s (20.82%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.70 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7483|ppo_ep: 1|act_loss: -0.007537841796875|cri_loss: -0.00336456298828125|unsuper_loss: 0.0
-average reward score: 5.2578125
--------------------------------------------------------------------------------------
-|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.69s (75.17%) |Training time=0.46s (20.45%) |Others=0.10 (4.37%)|CurSamplesPerSec=14.27 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7484|ppo_ep: 1|act_loss: 0.002811431884765625|cri_loss: 0.0015897750854492188|unsuper_loss: 0.0
-average reward score: 5.72265625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.91%) |Training time=0.46s (21.42%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7485|ppo_ep: 1|act_loss: -0.0172119140625|cri_loss: -0.00836944580078125|unsuper_loss: 0.0
-average reward score: 5.4453125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.40%) |Training time=0.45s (20.98%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7486|ppo_ep: 1|act_loss: 0.004638671875|cri_loss: 0.0026397705078125|unsuper_loss: 0.0
-average reward score: 5.3359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.90%) |Training time=0.46s (21.42%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7487|ppo_ep: 1|act_loss: -0.00982666015625|cri_loss: -0.004459381103515625|unsuper_loss: 0.0
-average reward score: 4.921875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.49%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7488|ppo_ep: 1|act_loss: -0.0114898681640625|cri_loss: -0.00560760498046875|unsuper_loss: 0.0
-average reward score: 5.58203125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.96%) |Training time=0.49s (22.41%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.56
-[2023-04-14 13:21:55,054] [INFO] [logging.py:96:log_dist] [Rank 0] step=7490, skipped=95, lr=[2.6509495215639946e-07, 2.6509495215639946e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:21:55,073] [INFO] [timer.py:199:stop] epoch=0/micro_step=7490/global_step=7490, RunningAvgSamplesPerSec=105.57695172591919, CurrSamplesPerSec=101.3811761080209, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:21:55,165] [INFO] [logging.py:96:log_dist] [Rank 0] step=7490, skipped=128, lr=[1.4792851930053116e-07, 1.4792851930053116e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7489|ppo_ep: 1|act_loss: -0.01031494140625|cri_loss: -0.0049896240234375|unsuper_loss: 0.0
-average reward score: 4.97265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.13%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7490|ppo_ep: 1|act_loss: 0.003208160400390625|cri_loss: 0.0019092559814453125|unsuper_loss: 0.0
-average reward score: 5.1875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.96%) |Training time=0.49s (22.40%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7491|ppo_ep: 1|act_loss: -0.0019426345825195312|cri_loss: -0.000942230224609375|unsuper_loss: 0.0
-average reward score: 5.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.96%) |Training time=0.47s (21.43%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7492|ppo_ep: 1|act_loss: -0.01708984375|cri_loss: -0.00830078125|unsuper_loss: 0.0
-average reward score: 6.19921875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.84%) |Training time=0.45s (20.47%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7493|ppo_ep: 1|act_loss: -0.003551483154296875|cri_loss: -0.0016069412231445312|unsuper_loss: 0.0
-average reward score: 5.31640625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.16%) |Training time=0.46s (21.29%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7494|ppo_ep: 1|act_loss: -0.00821685791015625|cri_loss: -0.003978729248046875|unsuper_loss: 0.0
-average reward score: 5.8515625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.98%) |Training time=0.46s (21.32%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7495|ppo_ep: 1|act_loss: 0.030670166015625|cri_loss: 0.0157470703125|unsuper_loss: 0.0
-average reward score: 5.19140625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.86%) |Training time=0.47s (21.49%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7496|ppo_ep: 1|act_loss: 0.013885498046875|cri_loss: 0.007472991943359375|unsuper_loss: 0.0
-average reward score: 5.64453125
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.63s (71.27%) |Training time=0.45s (19.92%) |Others=0.20 (8.81%)|CurSamplesPerSec=14.01 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7497|ppo_ep: 1|act_loss: -0.0068511962890625|cri_loss: -0.0031795501708984375|unsuper_loss: 0.0
-average reward score: 5.50390625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.92%) |Training time=0.45s (20.53%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7498|ppo_ep: 1|act_loss: -0.0098114013671875|cri_loss: -0.0047607421875|unsuper_loss: 0.0
-average reward score: 5.3671875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.71%) |Training time=0.47s (21.60%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.56
-[2023-04-14 13:22:16,928] [INFO] [logging.py:96:log_dist] [Rank 0] step=7500, skipped=95, lr=[2.5905615163735286e-07, 2.5905615163735286e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:22:16,946] [INFO] [timer.py:199:stop] epoch=0/micro_step=7500/global_step=7500, RunningAvgSamplesPerSec=105.57782550562153, CurrSamplesPerSec=104.88119826961066, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:22:17,039] [INFO] [logging.py:96:log_dist] [Rank 0] step=7500, skipped=128, lr=[1.4468421961145924e-07, 1.4468421961145924e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7499|ppo_ep: 1|act_loss: 0.00513458251953125|cri_loss: 0.0026531219482421875|unsuper_loss: 0.0
-average reward score: 5.171875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.80%) |Training time=0.47s (21.64%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7500|ppo_ep: 1|act_loss: -0.0026226043701171875|cri_loss: -0.0012683868408203125|unsuper_loss: 0.0
-average reward score: 5.91796875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.44%) |Training time=0.45s (20.88%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7501|ppo_ep: 1|act_loss: -0.0023021697998046875|cri_loss: 8.96453857421875e-05|unsuper_loss: 0.0
-average reward score: 5.4296875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.97%) |Training time=0.46s (21.29%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7502|ppo_ep: 1|act_loss: 0.0137939453125|cri_loss: 0.007843017578125|unsuper_loss: 0.0
-average reward score: 4.91015625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.03%) |Training time=0.44s (20.38%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7503|ppo_ep: 1|act_loss: -0.0096435546875|cri_loss: -0.00400543212890625|unsuper_loss: 0.0
-average reward score: 5.3671875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.85%) |Training time=0.44s (20.57%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7504|ppo_ep: 1|act_loss: -0.02020263671875|cri_loss: -0.00994873046875|unsuper_loss: 0.0
-average reward score: 5.62109375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.19%) |Training time=0.43s (20.13%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7505|ppo_ep: 1|act_loss: -0.0022716522216796875|cri_loss: -0.0006518363952636719|unsuper_loss: 0.0
-average reward score: 5.23046875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.55%) |Training time=0.43s (19.86%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7506|ppo_ep: 1|act_loss: 0.011077880859375|cri_loss: 0.00572967529296875|unsuper_loss: 0.0
-average reward score: 5.21484375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.83%) |Training time=0.42s (19.46%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7507|ppo_ep: 1|act_loss: 0.001827239990234375|cri_loss: 0.0010480880737304688|unsuper_loss: 0.0
-average reward score: 5.3359375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.81%) |Training time=0.42s (19.60%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7508|ppo_ep: 1|act_loss: 0.00127410888671875|cri_loss: 0.0009026527404785156|unsuper_loss: 0.0
-average reward score: 5.0625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.28%) |Training time=0.43s (20.06%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.56
-[2023-04-14 13:22:38,417] [INFO] [logging.py:96:log_dist] [Rank 0] step=7510, skipped=95, lr=[2.5308502942087883e-07, 2.5308502942087883e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:22:38,436] [INFO] [timer.py:199:stop] epoch=0/micro_step=7510/global_step=7510, RunningAvgSamplesPerSec=105.59227493403863, CurrSamplesPerSec=120.76162267067473, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:22:38,528] [INFO] [logging.py:96:log_dist] [Rank 0] step=7510, skipped=128, lr=[1.4147483138414269e-07, 1.4147483138414269e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7509|ppo_ep: 1|act_loss: 0.0016269683837890625|cri_loss: 0.0010166168212890625|unsuper_loss: 0.0
-average reward score: 5.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.44%) |Training time=0.43s (19.97%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7510|ppo_ep: 1|act_loss: -0.020721435546875|cri_loss: -0.01023101806640625|unsuper_loss: 0.0
-average reward score: 5.625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.98%) |Training time=0.44s (20.35%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7511|ppo_ep: 1|act_loss: -0.04547119140625|cri_loss: -0.01995849609375|unsuper_loss: 0.0
-average reward score: 5.30078125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.43%) |Training time=0.43s (20.00%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7512|ppo_ep: 1|act_loss: -0.01806640625|cri_loss: -0.00852203369140625|unsuper_loss: 0.0
-average reward score: 5.9765625
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.03%) |Training time=0.44s (18.67%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.70 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7513|ppo_ep: 1|act_loss: 0.0228271484375|cri_loss: 0.01172637939453125|unsuper_loss: 0.0
-average reward score: 5.421875
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.71s (76.25%) |Training time=0.44s (19.37%) |Others=0.10 (4.38%)|CurSamplesPerSec=14.23 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7514|ppo_ep: 1|act_loss: -0.031982421875|cri_loss: -0.01556396484375|unsuper_loss: 0.0
-average reward score: 5.7265625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.88%) |Training time=0.44s (20.46%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
-[2023-04-14 13:22:51,702] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 7515|ppo_ep: 1|act_loss: 0.0291748046875|cri_loss: 0.0157318115234375|unsuper_loss: 0.0
-average reward score: 5.1015625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.37%) |Training time=0.44s (20.51%) |Others=0.09 (4.11%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.56
-[2023-04-14 13:22:53,844] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 7516|ppo_ep: 1|act_loss: -0.0148773193359375|cri_loss: -0.006542205810546875|unsuper_loss: 0.0
-average reward score: 5.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.22%) |Training time=0.44s (20.61%) |Others=0.09 (4.17%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7517|ppo_ep: 1|act_loss: 0.0228118896484375|cri_loss: 0.01151275634765625|unsuper_loss: 0.0
-average reward score: 5.05859375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.59%) |Training time=0.45s (20.87%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7518|ppo_ep: 1|act_loss: 0.0023403167724609375|cri_loss: 0.0014944076538085938|unsuper_loss: 0.0
-average reward score: 5.0859375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.83%) |Training time=0.44s (20.50%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
-[2023-04-14 13:23:00,186] [INFO] [logging.py:96:log_dist] [Rank 0] step=7520, skipped=95, lr=[2.471816740134132e-07, 2.471816740134132e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:23:00,204] [INFO] [timer.py:199:stop] epoch=0/micro_step=7520/global_step=7520, RunningAvgSamplesPerSec=105.6047174355037, CurrSamplesPerSec=117.03275525160899, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:23:00,297] [INFO] [logging.py:96:log_dist] [Rank 0] step=7520, skipped=130, lr=[1.3893248903978695e-07, 1.3893248903978695e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7519|ppo_ep: 1|act_loss: -0.02703857421875|cri_loss: -0.01279449462890625|unsuper_loss: 0.0
-average reward score: 5.1484375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.10%) |Training time=0.44s (20.33%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7520|ppo_ep: 1|act_loss: 0.0146331787109375|cri_loss: 0.00946044921875|unsuper_loss: 0.0
-average reward score: 5.61328125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.85%) |Training time=0.44s (20.49%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7521|ppo_ep: 1|act_loss: 0.033538818359375|cri_loss: 0.0172271728515625|unsuper_loss: 0.0
-average reward score: 6.171875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.28%) |Training time=0.43s (20.13%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7522|ppo_ep: 1|act_loss: 0.00530242919921875|cri_loss: 0.0028667449951171875|unsuper_loss: 0.0
-average reward score: 5.40234375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.99%) |Training time=0.44s (20.35%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7523|ppo_ep: 1|act_loss: 0.003238677978515625|cri_loss: 0.0018138885498046875|unsuper_loss: 0.0
-average reward score: 5.19921875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.26%) |Training time=0.43s (20.07%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7524|ppo_ep: 1|act_loss: -0.0134124755859375|cri_loss: -0.006565093994140625|unsuper_loss: 0.0
-average reward score: 5.84375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.26%) |Training time=0.43s (20.06%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7525|ppo_ep: 1|act_loss: 0.0013933181762695312|cri_loss: 0.0007691383361816406|unsuper_loss: 0.0
-average reward score: 4.94140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.72%) |Training time=0.45s (20.73%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7526|ppo_ep: 1|act_loss: 0.00327301025390625|cri_loss: 0.001712799072265625|unsuper_loss: 0.0
-average reward score: 5.6171875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.20%) |Training time=0.43s (20.13%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7527|ppo_ep: 1|act_loss: -0.00595855712890625|cri_loss: -0.0021209716796875|unsuper_loss: 0.0
-average reward score: 5.5625
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.78s (77.04%) |Training time=0.43s (18.67%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.88 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7528|ppo_ep: 1|act_loss: 0.03192138671875|cri_loss: 0.0162353515625|unsuper_loss: 0.0
-average reward score: 5.6953125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.19%) |Training time=0.43s (20.11%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56
-[2023-04-14 13:23:21,842] [INFO] [logging.py:96:log_dist] [Rank 0] step=7530, skipped=95, lr=[2.413461729169248e-07, 2.413461729169248e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:23:21,861] [INFO] [timer.py:199:stop] epoch=0/micro_step=7530/global_step=7530, RunningAvgSamplesPerSec=105.61827921516286, CurrSamplesPerSec=111.98603613450567, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:23:21,953] [INFO] [logging.py:96:log_dist] [Rank 0] step=7530, skipped=130, lr=[1.357860609740455e-07, 1.357860609740455e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7529|ppo_ep: 1|act_loss: -0.0020847320556640625|cri_loss: 0.0007724761962890625|unsuper_loss: 0.0
-average reward score: 5.8984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.60%) |Training time=0.45s (20.83%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7530|ppo_ep: 1|act_loss: -0.008270263671875|cri_loss: -0.00394439697265625|unsuper_loss: 0.0
-average reward score: 4.8828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.35%) |Training time=0.45s (21.00%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7531|ppo_ep: 1|act_loss: 0.037750244140625|cri_loss: 0.0194091796875|unsuper_loss: 0.0
-average reward score: 5.27734375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.88%) |Training time=0.44s (20.54%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7532|ppo_ep: 1|act_loss: 0.01190185546875|cri_loss: 0.00634765625|unsuper_loss: 0.0
-average reward score: 5.734375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.73%) |Training time=0.44s (20.61%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7533|ppo_ep: 1|act_loss: 0.0234375|cri_loss: 0.0126495361328125|unsuper_loss: 0.0
-average reward score: 5.609375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.95%) |Training time=0.44s (20.45%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7534|ppo_ep: 1|act_loss: -0.00909423828125|cri_loss: -0.004352569580078125|unsuper_loss: 0.0
-average reward score: 5.9609375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.94%) |Training time=0.44s (20.39%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7535|ppo_ep: 1|act_loss: 0.018157958984375|cri_loss: 0.0093536376953125|unsuper_loss: 0.0
-average reward score: 4.9921875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.31%) |Training time=0.43s (20.10%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7536|ppo_ep: 1|act_loss: 0.01336669921875|cri_loss: 0.007061004638671875|unsuper_loss: 0.0
-average reward score: 5.1484375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.58%) |Training time=0.45s (20.77%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7537|ppo_ep: 1|act_loss: -0.016876220703125|cri_loss: -0.00801849365234375|unsuper_loss: 0.0
-average reward score: 4.66796875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.74%) |Training time=0.44s (20.69%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7538|ppo_ep: 1|act_loss: -0.024139404296875|cri_loss: -0.0118560791015625|unsuper_loss: 0.0
-average reward score: 4.9765625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.63%) |Training time=0.44s (20.71%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
-[2023-04-14 13:23:43,304] [INFO] [logging.py:96:log_dist] [Rank 0] step=7540, skipped=95, lr=[2.355786126276159e-07, 2.355786126276159e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:23:43,322] [INFO] [timer.py:199:stop] epoch=0/micro_step=7540/global_step=7540, RunningAvgSamplesPerSec=105.62894102683586, CurrSamplesPerSec=112.07852142696694, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:23:43,414] [INFO] [logging.py:96:log_dist] [Rank 0] step=7540, skipped=130, lr=[1.3267467626223606e-07, 1.3267467626223606e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7539|ppo_ep: 1|act_loss: 0.0186767578125|cri_loss: 0.00942230224609375|unsuper_loss: 0.0
-average reward score: 5.96484375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.60%) |Training time=0.45s (20.85%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7540|ppo_ep: 1|act_loss: -0.011962890625|cri_loss: -0.005840301513671875|unsuper_loss: 0.0
-average reward score: 5.5546875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.59%) |Training time=0.44s (20.74%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7541|ppo_ep: 1|act_loss: -0.002826690673828125|cri_loss: -0.001079559326171875|unsuper_loss: 0.0
-average reward score: 5.16796875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.35%) |Training time=0.43s (20.07%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7542|ppo_ep: 1|act_loss: 0.0143280029296875|cri_loss: 0.00728607177734375|unsuper_loss: 0.0
-average reward score: 5.67578125
--------------------------------------------------------------------------------------
-|E2E latency=2.40s |Gather latency=0.00s (0.00%) |Generate time=1.81s (75.39%) |Training time=0.44s (18.29%) |Others=0.15 (6.33%)|CurSamplesPerSec=13.36 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7543|ppo_ep: 1|act_loss: -0.0010290145874023438|cri_loss: 0.00040149688720703125|unsuper_loss: 0.0
-average reward score: 5.13671875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.83%) |Training time=0.44s (20.59%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7544|ppo_ep: 1|act_loss: -0.026947021484375|cri_loss: -0.01317596435546875|unsuper_loss: 0.0
-average reward score: 4.73828125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.70%) |Training time=0.44s (20.63%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7545|ppo_ep: 1|act_loss: -0.00775146484375|cri_loss: -0.0036716461181640625|unsuper_loss: 0.0
-average reward score: 5.8046875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.00%) |Training time=0.44s (20.32%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7546|ppo_ep: 1|act_loss: -0.00421142578125|cri_loss: -0.0018768310546875|unsuper_loss: 0.0
-average reward score: 5.3515625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.58%) |Training time=0.45s (20.85%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7547|ppo_ep: 1|act_loss: 0.026153564453125|cri_loss: 0.01334381103515625|unsuper_loss: 0.0
-average reward score: 5.39453125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.13%) |Training time=0.43s (20.23%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7548|ppo_ep: 1|act_loss: -0.0076904296875|cri_loss: -0.003749847412109375|unsuper_loss: 0.0
-average reward score: 5.359375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.10%) |Training time=0.43s (20.20%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56
-[2023-04-14 13:24:05,009] [INFO] [logging.py:96:log_dist] [Rank 0] step=7550, skipped=95, lr=[2.298790786346439e-07, 2.298790786346439e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:24:05,027] [INFO] [timer.py:199:stop] epoch=0/micro_step=7550/global_step=7550, RunningAvgSamplesPerSec=105.64110098341945, CurrSamplesPerSec=114.46428324718376, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:24:05,120] [INFO] [logging.py:96:log_dist] [Rank 0] step=7550, skipped=130, lr=[1.2959838102258537e-07, 1.2959838102258537e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7549|ppo_ep: 1|act_loss: -0.001949310302734375|cri_loss: -0.0006542205810546875|unsuper_loss: 0.0
-average reward score: 5.19921875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.79%) |Training time=0.44s (20.64%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7550|ppo_ep: 1|act_loss: -0.004299163818359375|cri_loss: -0.0019626617431640625|unsuper_loss: 0.0
-average reward score: 4.98828125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.95%) |Training time=0.44s (20.37%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7551|ppo_ep: 1|act_loss: 0.004322052001953125|cri_loss: 0.002300262451171875|unsuper_loss: 0.0
-average reward score: 5.3359375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.15%) |Training time=0.43s (20.26%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7552|ppo_ep: 1|act_loss: -0.00215911865234375|cri_loss: -0.0009260177612304688|unsuper_loss: 0.0
-average reward score: 5.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.82%) |Training time=0.44s (20.52%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7553|ppo_ep: 1|act_loss: 0.02783203125|cri_loss: 0.01480865478515625|unsuper_loss: 0.0
-average reward score: 5.58984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.74%) |Training time=0.44s (20.69%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7554|ppo_ep: 1|act_loss: 0.00394439697265625|cri_loss: 0.002223968505859375|unsuper_loss: 0.0
-average reward score: 5.671875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.09%) |Training time=0.43s (20.23%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7555|ppo_ep: 1|act_loss: -0.0018434524536132812|cri_loss: -0.0006570816040039062|unsuper_loss: 0.0
-average reward score: 5.90625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.36%) |Training time=0.43s (20.06%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7556|ppo_ep: 1|act_loss: 0.0039520263671875|cri_loss: 0.002269744873046875|unsuper_loss: 0.0
-average reward score: 5.03125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.48%) |Training time=0.45s (20.88%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7557|ppo_ep: 1|act_loss: 0.0239105224609375|cri_loss: 0.012115478515625|unsuper_loss: 0.0
-average reward score: 5.1484375
--------------------------------------------------------------------------------------
-|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.00%) |Training time=0.45s (20.26%) |Others=0.17 (7.75%)|CurSamplesPerSec=14.38 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7558|ppo_ep: 1|act_loss: -0.02239990234375|cri_loss: -0.01085662841796875|unsuper_loss: 0.0
-average reward score: 5.34375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.22%) |Training time=0.43s (20.09%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56
-[2023-04-14 13:24:26,536] [INFO] [logging.py:96:log_dist] [Rank 0] step=7560, skipped=95, lr=[2.242476554188525e-07, 2.242476554188525e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:24:26,554] [INFO] [timer.py:199:stop] epoch=0/micro_step=7560/global_step=7560, RunningAvgSamplesPerSec=105.65329125870058, CurrSamplesPerSec=115.2066690872322, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:24:26,647] [INFO] [logging.py:96:log_dist] [Rank 0] step=7560, skipped=130, lr=[1.2655722085321064e-07, 1.2655722085321064e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7559|ppo_ep: 1|act_loss: -0.02569580078125|cri_loss: -0.012420654296875|unsuper_loss: 0.0
-average reward score: 6.05859375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.85%) |Training time=0.44s (20.57%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7560|ppo_ep: 1|act_loss: 0.004360198974609375|cri_loss: 0.0026397705078125|unsuper_loss: 0.0
-average reward score: 5.2578125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.61%) |Training time=0.44s (20.72%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7561|ppo_ep: 1|act_loss: -0.00481414794921875|cri_loss: -0.002254486083984375|unsuper_loss: 0.0
-average reward score: 5.64453125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.81%) |Training time=0.44s (20.51%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7562|ppo_ep: 1|act_loss: -0.0236053466796875|cri_loss: -0.01155853271484375|unsuper_loss: 0.0
-average reward score: 5.35546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.64%) |Training time=0.45s (20.81%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7563|ppo_ep: 1|act_loss: 0.015167236328125|cri_loss: 0.007965087890625|unsuper_loss: 0.0
-average reward score: 5.40625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.81%) |Training time=0.44s (20.62%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7564|ppo_ep: 1|act_loss: -0.0111083984375|cri_loss: -0.00547027587890625|unsuper_loss: 0.0
-average reward score: 4.9609375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.76%) |Training time=0.44s (20.58%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7565|ppo_ep: 1|act_loss: 0.00646209716796875|cri_loss: 0.003376007080078125|unsuper_loss: 0.0
-average reward score: 5.390625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.82%) |Training time=0.44s (20.52%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7566|ppo_ep: 1|act_loss: -0.0036830902099609375|cri_loss: -0.001728057861328125|unsuper_loss: 0.0
-average reward score: 4.80859375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.72%) |Training time=0.44s (20.60%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7567|ppo_ep: 1|act_loss: -0.0219268798828125|cri_loss: -0.0107879638671875|unsuper_loss: 0.0
-average reward score: 5.24609375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.78%) |Training time=0.44s (20.56%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7568|ppo_ep: 1|act_loss: -0.01464080810546875|cri_loss: -0.006877899169921875|unsuper_loss: 0.0
-average reward score: 5.51171875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.68%) |Training time=0.44s (20.67%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.56
-[2023-04-14 13:24:47,993] [INFO] [logging.py:96:log_dist] [Rank 0] step=7570, skipped=95, lr=[2.186844264515187e-07, 2.186844264515187e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:24:48,012] [INFO] [timer.py:199:stop] epoch=0/micro_step=7570/global_step=7570, RunningAvgSamplesPerSec=105.66395711527322, CurrSamplesPerSec=114.59886133490893, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:24:48,104] [INFO] [logging.py:96:log_dist] [Rank 0] step=7570, skipped=130, lr=[1.235512408314418e-07, 1.235512408314418e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7569|ppo_ep: 1|act_loss: 0.05194091796875|cri_loss: 0.0263824462890625|unsuper_loss: 0.0
-average reward score: 5.3359375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.76%) |Training time=0.44s (20.58%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7570|ppo_ep: 1|act_loss: -0.0175018310546875|cri_loss: -0.00848388671875|unsuper_loss: 0.0
-average reward score: 5.18359375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.63%) |Training time=0.44s (20.71%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7571|ppo_ep: 1|act_loss: 0.0053863525390625|cri_loss: 0.0031070709228515625|unsuper_loss: 0.0
-average reward score: 5.046875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.95%) |Training time=0.44s (20.23%) |Others=0.10 (4.81%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7572|ppo_ep: 1|act_loss: -0.00743865966796875|cri_loss: -0.0034351348876953125|unsuper_loss: 0.0
-average reward score: 5.4453125
--------------------------------------------------------------------------------------
-|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.69s (72.01%) |Training time=0.56s (23.73%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.63 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7573|ppo_ep: 1|act_loss: 0.01708984375|cri_loss: 0.0089111328125|unsuper_loss: 0.0
-average reward score: 5.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.65%) |Training time=0.44s (20.68%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7574|ppo_ep: 1|act_loss: 0.00736236572265625|cri_loss: 0.00399017333984375|unsuper_loss: 0.0
-average reward score: 5.34375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.56%) |Training time=0.45s (20.78%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7575|ppo_ep: 1|act_loss: -0.009674072265625|cri_loss: -0.004669189453125|unsuper_loss: 0.0
-average reward score: 4.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.71%) |Training time=0.44s (20.63%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7576|ppo_ep: 1|act_loss: 0.016326904296875|cri_loss: 0.00838470458984375|unsuper_loss: 0.0
-average reward score: 6.1953125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.91%) |Training time=0.44s (20.37%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7577|ppo_ep: 1|act_loss: 0.002529144287109375|cri_loss: 0.0013599395751953125|unsuper_loss: 0.0
-average reward score: 5.19140625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.07%) |Training time=0.44s (20.33%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7578|ppo_ep: 1|act_loss: 0.126953125|cri_loss: 0.07867431640625|unsuper_loss: 0.0
-average reward score: 5.875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.40%) |Training time=0.45s (20.97%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56
-[2023-04-14 13:25:09,680] [INFO] [logging.py:96:log_dist] [Rank 0] step=7580, skipped=95, lr=[2.131894741931159e-07, 2.131894741931159e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:25:09,699] [INFO] [timer.py:199:stop] epoch=0/micro_step=7580/global_step=7580, RunningAvgSamplesPerSec=105.6733901747893, CurrSamplesPerSec=118.37709679268664, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:25:09,791] [INFO] [logging.py:96:log_dist] [Rank 0] step=7580, skipped=130, lr=[1.2058048551315455e-07, 1.2058048551315455e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7579|ppo_ep: 1|act_loss: -0.00325775146484375|cri_loss: -0.0013904571533203125|unsuper_loss: 0.0
-average reward score: 5.60546875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.10%) |Training time=0.43s (20.23%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7580|ppo_ep: 1|act_loss: -0.01525115966796875|cri_loss: -0.0068359375|unsuper_loss: 0.0
-average reward score: 5.8984375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.08%) |Training time=0.43s (20.24%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7581|ppo_ep: 1|act_loss: 0.06732177734375|cri_loss: 0.038665771484375|unsuper_loss: 0.0
-average reward score: 5.6640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.27%) |Training time=0.43s (20.10%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7582|ppo_ep: 1|act_loss: 0.00707244873046875|cri_loss: 0.0036716461181640625|unsuper_loss: 0.0
-average reward score: 5.8515625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.95%) |Training time=0.44s (20.38%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7583|ppo_ep: 1|act_loss: 0.040618896484375|cri_loss: 0.0207366943359375|unsuper_loss: 0.0
-average reward score: 5.34375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.15%) |Training time=0.43s (20.17%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7584|ppo_ep: 1|act_loss: -0.0211181640625|cri_loss: -0.01018524169921875|unsuper_loss: 0.0
-average reward score: 4.8203125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.80%) |Training time=0.44s (20.52%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7585|ppo_ep: 1|act_loss: -0.0008382797241210938|cri_loss: -0.0003597736358642578|unsuper_loss: 0.0
-average reward score: 5.27734375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.13%) |Training time=0.43s (20.19%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7586|ppo_ep: 1|act_loss: 0.00284576416015625|cri_loss: 0.0018253326416015625|unsuper_loss: 0.0
-average reward score: 5.23046875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.99%) |Training time=0.44s (20.34%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7587|ppo_ep: 1|act_loss: -0.0142364501953125|cri_loss: -0.00701904296875|unsuper_loss: 0.0
-average reward score: 5.42578125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.19%) |Training time=0.43s (20.12%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7588|ppo_ep: 1|act_loss: -0.00010442733764648438|cri_loss: 0.0002875328063964844|unsuper_loss: 0.0
-average reward score: 5.3671875
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.76%) |Training time=0.44s (18.90%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.86 |AvgSamplesPerSec=14.56
-[2023-04-14 13:25:31,281] [INFO] [logging.py:96:log_dist] [Rank 0] step=7590, skipped=95, lr=[2.0776288009209243e-07, 2.0776288009209243e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:25:31,300] [INFO] [timer.py:199:stop] epoch=0/micro_step=7590/global_step=7590, RunningAvgSamplesPerSec=105.68754108352529, CurrSamplesPerSec=118.10116994467049, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:25:31,392] [INFO] [logging.py:96:log_dist] [Rank 0] step=7590, skipped=130, lr=[1.1764499893210879e-07, 1.1764499893210879e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7589|ppo_ep: 1|act_loss: -0.02447509765625|cri_loss: -0.01074981689453125|unsuper_loss: 0.0
-average reward score: 4.8828125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.08%) |Training time=0.43s (20.25%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7590|ppo_ep: 1|act_loss: -0.0019626617431640625|cri_loss: -0.0008349418640136719|unsuper_loss: 0.0
-average reward score: 5.88671875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.66%) |Training time=0.44s (20.70%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7591|ppo_ep: 1|act_loss: -0.0298004150390625|cri_loss: -0.01467132568359375|unsuper_loss: 0.0
-average reward score: 4.7890625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.93%) |Training time=0.44s (20.40%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7592|ppo_ep: 1|act_loss: -0.0089111328125|cri_loss: -0.0044097900390625|unsuper_loss: 0.0
-average reward score: 6.21875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.89%) |Training time=0.44s (20.42%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7593|ppo_ep: 1|act_loss: -0.00467681884765625|cri_loss: -0.001857757568359375|unsuper_loss: 0.0
-average reward score: 5.453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.54%) |Training time=0.45s (20.79%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7594|ppo_ep: 1|act_loss: 0.01416015625|cri_loss: 0.00722503662109375|unsuper_loss: 0.0
-average reward score: 5.8359375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.56%) |Training time=0.45s (20.78%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7595|ppo_ep: 1|act_loss: -0.00719451904296875|cri_loss: -0.0034542083740234375|unsuper_loss: 0.0
-average reward score: 5.82421875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.22%) |Training time=0.43s (20.10%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7596|ppo_ep: 1|act_loss: -0.0211029052734375|cri_loss: -0.0099334716796875|unsuper_loss: 0.0
-average reward score: 4.90234375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.08%) |Training time=0.43s (20.23%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7597|ppo_ep: 1|act_loss: 0.003650665283203125|cri_loss: 0.002460479736328125|unsuper_loss: 0.0
-average reward score: 5.1875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.91%) |Training time=0.44s (20.41%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7598|ppo_ep: 1|act_loss: 0.00899505615234375|cri_loss: 0.0050506591796875|unsuper_loss: 0.0
-average reward score: 5.9453125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.03%) |Training time=0.43s (20.29%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.56
-[2023-04-14 13:25:52,727] [INFO] [logging.py:96:log_dist] [Rank 0] step=7600, skipped=95, lr=[2.024047245836643e-07, 2.024047245836643e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:25:52,746] [INFO] [timer.py:199:stop] epoch=0/micro_step=7600/global_step=7600, RunningAvgSamplesPerSec=105.69993310975197, CurrSamplesPerSec=117.69169550112328, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:25:52,838] [INFO] [logging.py:96:log_dist] [Rank 0] step=7600, skipped=130, lr=[1.1474482459929714e-07, 1.1474482459929714e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7599|ppo_ep: 1|act_loss: -0.0093536376953125|cri_loss: -0.0044097900390625|unsuper_loss: 0.0
-average reward score: 5.42578125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.02%) |Training time=0.43s (20.30%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7600|ppo_ep: 1|act_loss: -0.025604248046875|cri_loss: -0.0126190185546875|unsuper_loss: 0.0
-average reward score: 5.3828125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.76%) |Training time=0.44s (20.58%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7601|ppo_ep: 1|act_loss: 0.00301361083984375|cri_loss: 0.0016078948974609375|unsuper_loss: 0.0
-average reward score: 5.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.00%) |Training time=0.44s (20.33%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7602|ppo_ep: 1|act_loss: -0.020599365234375|cri_loss: -0.00994873046875|unsuper_loss: 0.0
-average reward score: 6.44140625
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.72s (73.39%) |Training time=0.45s (19.01%) |Others=0.18 (7.60%)|CurSamplesPerSec=13.65 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7603|ppo_ep: 1|act_loss: -0.0031585693359375|cri_loss: -0.001377105712890625|unsuper_loss: 0.0
-average reward score: 5.58984375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.18%) |Training time=0.44s (19.92%) |Others=0.11 (4.89%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7604|ppo_ep: 1|act_loss: 0.00724029541015625|cri_loss: 0.0037631988525390625|unsuper_loss: 0.0
-average reward score: 5.109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.55%) |Training time=0.45s (20.80%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7605|ppo_ep: 1|act_loss: -0.00554656982421875|cri_loss: -0.0021820068359375|unsuper_loss: 0.0
-average reward score: 5.44921875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.96%) |Training time=0.44s (20.36%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7606|ppo_ep: 1|act_loss: -0.0665283203125|cri_loss: -0.032135009765625|unsuper_loss: 0.0
-average reward score: 5.16015625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.99%) |Training time=0.44s (20.33%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7607|ppo_ep: 1|act_loss: -0.0015411376953125|cri_loss: -0.0003371238708496094|unsuper_loss: 0.0
-average reward score: 5.3515625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.79%) |Training time=0.44s (20.62%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7608|ppo_ep: 1|act_loss: -0.01561737060546875|cri_loss: -0.00743865966796875|unsuper_loss: 0.0
-average reward score: 5.3671875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.21%) |Training time=0.43s (19.99%) |Others=0.10 (4.80%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.56
-[2023-04-14 13:26:14,473] [INFO] [logging.py:96:log_dist] [Rank 0] step=7610, skipped=95, lr=[1.9711508708862154e-07, 1.9711508708862154e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:26:14,491] [INFO] [timer.py:199:stop] epoch=0/micro_step=7610/global_step=7610, RunningAvgSamplesPerSec=105.71107588458801, CurrSamplesPerSec=105.46318940792834, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:26:14,583] [INFO] [logging.py:96:log_dist] [Rank 0] step=7610, skipped=130, lr=[1.1188000550230005e-07, 1.1188000550230005e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7609|ppo_ep: 1|act_loss: -0.0077056884765625|cri_loss: -0.0035419464111328125|unsuper_loss: 0.0
-average reward score: 5.48046875
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.22%) |Training time=0.47s (21.22%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7610|ppo_ep: 1|act_loss: 0.0023365020751953125|cri_loss: 0.0012693405151367188|unsuper_loss: 0.0
-average reward score: 5.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.99%) |Training time=0.46s (21.39%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7611|ppo_ep: 1|act_loss: 0.020172119140625|cri_loss: 0.010772705078125|unsuper_loss: 0.0
-average reward score: 5.0078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.22%) |Training time=0.46s (21.15%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7612|ppo_ep: 1|act_loss: -0.016632080078125|cri_loss: -0.0080718994140625|unsuper_loss: 0.0
-average reward score: 5.25390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.29%) |Training time=0.46s (21.08%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56
-[2023-04-14 13:26:23,123] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 7613|ppo_ep: 1|act_loss: -0.006011962890625|cri_loss: -0.0027866363525390625|unsuper_loss: 0.0
-average reward score: 5.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.06%) |Training time=0.43s (20.27%) |Others=0.10 (4.67%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7614|ppo_ep: 1|act_loss: -0.00437164306640625|cri_loss: -0.001934051513671875|unsuper_loss: 0.0
-average reward score: 5.13671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.94%) |Training time=0.46s (21.45%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7615|ppo_ep: 1|act_loss: -0.00489044189453125|cri_loss: -0.0023822784423828125|unsuper_loss: 0.0
-average reward score: 4.67578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.26%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7616|ppo_ep: 1|act_loss: -0.01690673828125|cri_loss: -0.00826263427734375|unsuper_loss: 0.0
-average reward score: 5.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.33%) |Training time=0.46s (21.02%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56
-[2023-04-14 13:26:32,062] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 7617|ppo_ep: 1|act_loss: 0.01526641845703125|cri_loss: 0.0093231201171875|unsuper_loss: 0.0
-average reward score: 5.3515625
--------------------------------------------------------------------------------------
-|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.44%) |Training time=0.49s (20.76%) |Others=0.09 (3.81%)|CurSamplesPerSec=13.61 |AvgSamplesPerSec=14.56
-[2023-04-14 13:26:34,217] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 7618|ppo_ep: 1|act_loss: 0.0286865234375|cri_loss: 0.0150604248046875|unsuper_loss: 0.0
-average reward score: 5.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.31%) |Training time=0.46s (21.55%) |Others=0.09 (4.15%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.56
-[2023-04-14 13:26:36,272] [INFO] [logging.py:96:log_dist] [Rank 0] step=7620, skipped=96, lr=[1.9241306106741956e-07, 1.9241306106741956e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:26:36,290] [INFO] [timer.py:199:stop] epoch=0/micro_step=7620/global_step=7620, RunningAvgSamplesPerSec=105.71308353833173, CurrSamplesPerSec=105.99807301686585, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:26:36,383] [INFO] [logging.py:96:log_dist] [Rank 0] step=7620, skipped=132, lr=[1.0961363454760482e-07, 1.0961363454760482e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7619|ppo_ep: 1|act_loss: 0.0058441162109375|cri_loss: 0.00321197509765625|unsuper_loss: 0.0
-average reward score: 4.8046875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.96%) |Training time=0.46s (21.42%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7620|ppo_ep: 1|act_loss: -0.006824493408203125|cri_loss: -0.00334930419921875|unsuper_loss: 0.0
-average reward score: 5.609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.89%) |Training time=0.47s (21.49%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7621|ppo_ep: 1|act_loss: -0.006504058837890625|cri_loss: -0.0030193328857421875|unsuper_loss: 0.0
-average reward score: 5.19921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.91%) |Training time=0.47s (21.47%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7622|ppo_ep: 1|act_loss: 0.010223388671875|cri_loss: 0.005458831787109375|unsuper_loss: 0.0
-average reward score: 5.01953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.04%) |Training time=0.46s (21.33%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7623|ppo_ep: 1|act_loss: 0.007648468017578125|cri_loss: 0.003936767578125|unsuper_loss: 0.0
-average reward score: 5.265625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.46%) |Training time=0.48s (21.92%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7624|ppo_ep: 1|act_loss: 0.0036411285400390625|cri_loss: 0.002719879150390625|unsuper_loss: 0.0
-average reward score: 5.54296875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.55%) |Training time=0.48s (21.86%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7625|ppo_ep: 1|act_loss: -0.0020275115966796875|cri_loss: -0.0009050369262695312|unsuper_loss: 0.0
-average reward score: 5.45703125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.42%) |Training time=0.45s (20.93%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7626|ppo_ep: 1|act_loss: 0.0220794677734375|cri_loss: 0.01171112060546875|unsuper_loss: 0.0
-average reward score: 5.48828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.99%) |Training time=0.46s (21.39%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7627|ppo_ep: 1|act_loss: 0.004680633544921875|cri_loss: 0.0029010772705078125|unsuper_loss: 0.0
-average reward score: 5.28515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.02%) |Training time=0.46s (21.36%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7628|ppo_ep: 1|act_loss: 0.018707275390625|cri_loss: 0.00984954833984375|unsuper_loss: 0.0
-average reward score: 4.9140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.93%) |Training time=0.46s (21.45%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56
-[2023-04-14 13:26:57,936] [INFO] [logging.py:96:log_dist] [Rank 0] step=7630, skipped=96, lr=[1.8725382296377066e-07, 1.8725382296377066e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:26:57,955] [INFO] [timer.py:199:stop] epoch=0/micro_step=7630/global_step=7630, RunningAvgSamplesPerSec=105.71314051584926, CurrSamplesPerSec=106.23861904871212, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:26:58,047] [INFO] [logging.py:96:log_dist] [Rank 0] step=7630, skipped=132, lr=[1.0681256153060565e-07, 1.0681256153060565e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7629|ppo_ep: 1|act_loss: -0.0301513671875|cri_loss: -0.0148773193359375|unsuper_loss: 0.0
-average reward score: 5.94140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.95%) |Training time=0.46s (21.45%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7630|ppo_ep: 1|act_loss: -0.017547607421875|cri_loss: -0.008636474609375|unsuper_loss: 0.0
-average reward score: 5.5234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.27%) |Training time=0.46s (21.12%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7631|ppo_ep: 1|act_loss: 0.0010089874267578125|cri_loss: 0.0006146430969238281|unsuper_loss: 0.0
-average reward score: 5.78125
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.46%) |Training time=0.46s (20.99%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7632|ppo_ep: 1|act_loss: -0.004638671875|cri_loss: -0.002239227294921875|unsuper_loss: 0.0
-average reward score: 5.6640625
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.24%) |Training time=0.46s (20.33%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7633|ppo_ep: 1|act_loss: 0.0055999755859375|cri_loss: 0.0029544830322265625|unsuper_loss: 0.0
-average reward score: 5.4921875
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.68%) |Training time=0.47s (20.03%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7634|ppo_ep: 1|act_loss: 0.028350830078125|cri_loss: 0.014739990234375|unsuper_loss: 0.0
-average reward score: 5.52734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.74%) |Training time=0.47s (21.65%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7635|ppo_ep: 1|act_loss: 0.00738525390625|cri_loss: 0.00392913818359375|unsuper_loss: 0.0
-average reward score: 4.8828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.13%) |Training time=0.46s (21.11%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7636|ppo_ep: 1|act_loss: 0.0267791748046875|cri_loss: 0.01430511474609375|unsuper_loss: 0.0
-average reward score: 5.1015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.11%) |Training time=0.46s (21.25%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7637|ppo_ep: 1|act_loss: 0.01326751708984375|cri_loss: 0.00701904296875|unsuper_loss: 0.0
-average reward score: 5.26171875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.25%) |Training time=0.46s (21.15%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7638|ppo_ep: 1|act_loss: 0.0040130615234375|cri_loss: 0.0021495819091796875|unsuper_loss: 0.0
-average reward score: 5.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.44%) |Training time=0.46s (20.94%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56
-[2023-04-14 13:27:19,954] [INFO] [logging.py:96:log_dist] [Rank 0] step=7640, skipped=96, lr=[1.8216332744641935e-07, 1.8216332744641935e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:27:19,973] [INFO] [timer.py:199:stop] epoch=0/micro_step=7640/global_step=7640, RunningAvgSamplesPerSec=105.71413966238659, CurrSamplesPerSec=101.97505208246275, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:27:20,065] [INFO] [logging.py:96:log_dist] [Rank 0] step=7640, skipped=132, lr=[1.0404696132469445e-07, 1.0404696132469445e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7639|ppo_ep: 1|act_loss: 0.00672149658203125|cri_loss: 0.00396728515625|unsuper_loss: 0.0
-average reward score: 6.265625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.54%) |Training time=0.48s (21.90%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7640|ppo_ep: 1|act_loss: 0.00762939453125|cri_loss: 0.00408935546875|unsuper_loss: 0.0
-average reward score: 5.82421875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.76%) |Training time=0.46s (21.58%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7641|ppo_ep: 1|act_loss: -0.010955810546875|cri_loss: -0.00516510009765625|unsuper_loss: 0.0
-average reward score: 5.5234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.01%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7642|ppo_ep: 1|act_loss: -0.00540924072265625|cri_loss: -0.002593994140625|unsuper_loss: 0.0
-average reward score: 5.421875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.47%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7643|ppo_ep: 1|act_loss: 0.006183624267578125|cri_loss: 0.0033664703369140625|unsuper_loss: 0.0
-average reward score: 5.4453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.33%) |Training time=0.48s (22.05%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7644|ppo_ep: 1|act_loss: 0.01395416259765625|cri_loss: 0.00766754150390625|unsuper_loss: 0.0
-average reward score: 5.77734375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.98%) |Training time=0.46s (21.36%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7645|ppo_ep: 1|act_loss: -0.030029296875|cri_loss: -0.01447296142578125|unsuper_loss: 0.0
-average reward score: 5.91015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.47s (21.59%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7646|ppo_ep: 1|act_loss: -0.01416015625|cri_loss: -0.0068206787109375|unsuper_loss: 0.0
-average reward score: 5.796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.88%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7647|ppo_ep: 1|act_loss: 0.0216522216796875|cri_loss: 0.01099395751953125|unsuper_loss: 0.0
-average reward score: 5.23828125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.48s (21.94%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7648|ppo_ep: 1|act_loss: 0.013641357421875|cri_loss: 0.0073089599609375|unsuper_loss: 0.0
-average reward score: 5.34375
--------------------------------------------------------------------------------------
-|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.50%) |Training time=0.50s (21.24%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.57 |AvgSamplesPerSec=14.56
-[2023-04-14 13:27:41,752] [INFO] [logging.py:96:log_dist] [Rank 0] step=7650, skipped=96, lr=[1.7714164996878907e-07, 1.7714164996878907e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:27:41,770] [INFO] [timer.py:199:stop] epoch=0/micro_step=7650/global_step=7650, RunningAvgSamplesPerSec=105.71081829530702, CurrSamplesPerSec=105.03040784290403, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:27:41,863] [INFO] [logging.py:96:log_dist] [Rank 0] step=7650, skipped=132, lr=[1.0131687492273862e-07, 1.0131687492273862e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7649|ppo_ep: 1|act_loss: 0.0198211669921875|cri_loss: 0.01026153564453125|unsuper_loss: 0.0
-average reward score: 5.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.67%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7650|ppo_ep: 1|act_loss: -0.01372528076171875|cri_loss: -0.006732940673828125|unsuper_loss: 0.0
-average reward score: 5.77734375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.83%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
-[2023-04-14 13:27:46,070] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096
-epoch: 0|step: 7651|ppo_ep: 1|act_loss: 0.006175994873046875|cri_loss: 0.0032939910888671875|unsuper_loss: 0.0
-average reward score: 5.40625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.55%) |Training time=0.44s (20.78%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7652|ppo_ep: 1|act_loss: -0.00801849365234375|cri_loss: -0.003887176513671875|unsuper_loss: 0.0
-average reward score: 5.17578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.65%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7653|ppo_ep: 1|act_loss: -0.01218414306640625|cri_loss: -0.0047760009765625|unsuper_loss: 0.0
-average reward score: 5.953125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.18%) |Training time=0.48s (22.15%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7654|ppo_ep: 1|act_loss: -0.01444244384765625|cri_loss: -0.007080078125|unsuper_loss: 0.0
-average reward score: 5.1875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.07%) |Training time=0.46s (21.27%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7655|ppo_ep: 1|act_loss: 0.0143585205078125|cri_loss: 0.007495880126953125|unsuper_loss: 0.0
-average reward score: 5.27734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.07%) |Training time=0.49s (22.41%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7656|ppo_ep: 1|act_loss: -0.0041656494140625|cri_loss: -0.001190185546875|unsuper_loss: 0.0
-average reward score: 5.484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.80%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7657|ppo_ep: 1|act_loss: 0.039520263671875|cri_loss: 0.022735595703125|unsuper_loss: 0.0
-average reward score: 6.01171875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.70%) |Training time=0.47s (21.74%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7658|ppo_ep: 1|act_loss: -0.0272674560546875|cri_loss: -0.01178741455078125|unsuper_loss: 0.0
-average reward score: 5.625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.47s (21.68%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56
-[2023-04-14 13:28:03,367] [INFO] [logging.py:96:log_dist] [Rank 0] step=7660, skipped=97, lr=[1.7268104120316125e-07, 1.7268104120316125e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:28:03,385] [INFO] [timer.py:199:stop] epoch=0/micro_step=7660/global_step=7660, RunningAvgSamplesPerSec=105.70880371115669, CurrSamplesPerSec=103.66849002303269, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:28:03,480] [INFO] [logging.py:96:log_dist] [Rank 0] step=7660, skipped=132, lr=[9.862234279120419e-08, 9.862234279120419e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7659|ppo_ep: 1|act_loss: 0.022857666015625|cri_loss: 0.0119781494140625|unsuper_loss: 0.0
-average reward score: 4.9453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.81%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7660|ppo_ep: 1|act_loss: -0.0127716064453125|cri_loss: -0.00572967529296875|unsuper_loss: 0.0
-average reward score: 5.04296875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.72%) |Training time=0.46s (21.39%) |Others=0.11 (4.89%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7661|ppo_ep: 1|act_loss: -0.017333984375|cri_loss: -0.00823974609375|unsuper_loss: 0.0
-average reward score: 4.890625
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.66s (72.76%) |Training time=0.51s (22.55%) |Others=0.11 (4.69%)|CurSamplesPerSec=14.05 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7662|ppo_ep: 1|act_loss: 0.043792724609375|cri_loss: 0.0233612060546875|unsuper_loss: 0.0
-average reward score: 5.37109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.47s (21.85%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7663|ppo_ep: 1|act_loss: -0.024261474609375|cri_loss: -0.0117645263671875|unsuper_loss: 0.0
-average reward score: 5.40625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.87%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7664|ppo_ep: 1|act_loss: 0.0703125|cri_loss: 0.03778076171875|unsuper_loss: 0.0
-average reward score: 5.359375
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.49%) |Training time=0.53s (24.00%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.40 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7665|ppo_ep: 1|act_loss: 0.0843505859375|cri_loss: 0.0477294921875|unsuper_loss: 0.0
-average reward score: 4.77734375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.83%) |Training time=0.47s (21.62%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7666|ppo_ep: 1|act_loss: -0.00772857666015625|cri_loss: -0.0037326812744140625|unsuper_loss: 0.0
-average reward score: 5.58203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.30%) |Training time=0.48s (22.03%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7667|ppo_ep: 1|act_loss: 0.020172119140625|cri_loss: 0.0110626220703125|unsuper_loss: 0.0
-average reward score: 5.43359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.70%) |Training time=0.47s (21.76%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7668|ppo_ep: 1|act_loss: 0.0711669921875|cri_loss: 0.037109375|unsuper_loss: 0.0
-average reward score: 5.53515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.49%) |Training time=0.48s (21.88%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56
-[2023-04-14 13:28:25,190] [INFO] [logging.py:96:log_dist] [Rank 0] step=7670, skipped=97, lr=[1.677903222209867e-07, 1.677903222209867e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:28:25,208] [INFO] [timer.py:199:stop] epoch=0/micro_step=7670/global_step=7670, RunningAvgSamplesPerSec=105.70321438555533, CurrSamplesPerSec=105.26079819809787, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:28:25,301] [INFO] [logging.py:96:log_dist] [Rank 0] step=7670, skipped=132, lr=[9.596340486955818e-08, 9.596340486955818e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7669|ppo_ep: 1|act_loss: -0.00418853759765625|cri_loss: -0.001926422119140625|unsuper_loss: 0.0
-average reward score: 5.2421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.97%) |Training time=0.47s (21.51%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7670|ppo_ep: 1|act_loss: -0.0084381103515625|cri_loss: -0.003810882568359375|unsuper_loss: 0.0
-average reward score: 5.91796875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.55%) |Training time=0.47s (21.84%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7671|ppo_ep: 1|act_loss: -0.00292205810546875|cri_loss: -0.001384735107421875|unsuper_loss: 0.0
-average reward score: 5.5390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.74%) |Training time=0.47s (21.71%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7672|ppo_ep: 1|act_loss: -0.005619049072265625|cri_loss: -0.0026702880859375|unsuper_loss: 0.0
-average reward score: 5.44140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.71%) |Training time=0.47s (21.66%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7673|ppo_ep: 1|act_loss: -0.0075225830078125|cri_loss: -0.003704071044921875|unsuper_loss: 0.0
-average reward score: 5.58203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.55%) |Training time=0.47s (21.88%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7674|ppo_ep: 1|act_loss: -0.0134735107421875|cri_loss: -0.006656646728515625|unsuper_loss: 0.0
-average reward score: 4.83984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.75%) |Training time=0.47s (21.62%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7675|ppo_ep: 1|act_loss: 0.0252227783203125|cri_loss: 0.01290130615234375|unsuper_loss: 0.0
-average reward score: 5.35546875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.26%) |Training time=0.46s (21.20%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7676|ppo_ep: 1|act_loss: -0.00504302978515625|cri_loss: -0.0024242401123046875|unsuper_loss: 0.0
-average reward score: 6.1875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.47%) |Training time=0.45s (20.89%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7677|ppo_ep: 1|act_loss: -0.0011739730834960938|cri_loss: -0.0005507469177246094|unsuper_loss: 0.0
-average reward score: 5.24609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.46s (21.18%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7678|ppo_ep: 1|act_loss: -0.03155517578125|cri_loss: -0.0152740478515625|unsuper_loss: 0.0
-average reward score: 5.546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.47%) |Training time=0.45s (20.87%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.56
-[2023-04-14 13:28:46,830] [INFO] [logging.py:96:log_dist] [Rank 0] step=7680, skipped=97, lr=[1.629686343211429e-07, 1.629686343211429e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:28:46,848] [INFO] [timer.py:199:stop] epoch=0/micro_step=7680/global_step=7680, RunningAvgSamplesPerSec=105.70482233763329, CurrSamplesPerSec=112.06785304851931, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:28:46,941] [INFO] [logging.py:96:log_dist] [Rank 0] step=7680, skipped=132, lr=[9.334010056967408e-08, 9.334010056967408e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7679|ppo_ep: 1|act_loss: -0.0015592575073242188|cri_loss: -0.0006017684936523438|unsuper_loss: 0.0
-average reward score: 5.12890625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.34%) |Training time=0.49s (22.17%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7680|ppo_ep: 1|act_loss: 0.0016498565673828125|cri_loss: 0.0011415481567382812|unsuper_loss: 0.0
-average reward score: 5.609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.42%) |Training time=0.45s (20.93%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7681|ppo_ep: 1|act_loss: 0.012451171875|cri_loss: 0.006500244140625|unsuper_loss: 0.0
-average reward score: 6.28125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.25%) |Training time=0.46s (21.21%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7682|ppo_ep: 1|act_loss: 0.0462646484375|cri_loss: 0.0240325927734375|unsuper_loss: 0.0
-average reward score: 6.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.40%) |Training time=0.45s (20.95%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7683|ppo_ep: 1|act_loss: -0.0667724609375|cri_loss: -0.01611328125|unsuper_loss: 0.0
-average reward score: 5.12890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.46%) |Training time=0.45s (20.97%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7684|ppo_ep: 1|act_loss: -0.0005736351013183594|cri_loss: -0.00011801719665527344|unsuper_loss: 0.0
-average reward score: 5.16796875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.85%) |Training time=0.47s (21.55%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7685|ppo_ep: 1|act_loss: 0.015594482421875|cri_loss: 0.0088348388671875|unsuper_loss: 0.0
-average reward score: 6.34765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.08%) |Training time=0.46s (21.39%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7686|ppo_ep: 1|act_loss: -0.006443023681640625|cri_loss: -0.0030460357666015625|unsuper_loss: 0.0
-average reward score: 5.4921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.61%) |Training time=0.47s (21.66%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7687|ppo_ep: 1|act_loss: 0.0017538070678710938|cri_loss: 0.0012903213500976562|unsuper_loss: 0.0
-average reward score: 5.6875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.76%) |Training time=0.47s (21.69%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7688|ppo_ep: 1|act_loss: -0.0147247314453125|cri_loss: -0.0071258544921875|unsuper_loss: 0.0
-average reward score: 5.1796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.82%) |Training time=0.47s (21.55%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56
-[2023-04-14 13:29:08,495] [INFO] [logging.py:96:log_dist] [Rank 0] step=7690, skipped=97, lr=[1.5821604897267761e-07, 1.5821604897267761e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:29:08,514] [INFO] [timer.py:199:stop] epoch=0/micro_step=7690/global_step=7690, RunningAvgSamplesPerSec=105.70594568464799, CurrSamplesPerSec=104.29189793176543, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:29:08,609] [INFO] [logging.py:96:log_dist] [Rank 0] step=7690, skipped=132, lr=[9.075246877525034e-08, 9.075246877525034e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7689|ppo_ep: 1|act_loss: -0.00147247314453125|cri_loss: 0.00119781494140625|unsuper_loss: 0.0
-average reward score: 5.33984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.80%) |Training time=0.47s (21.60%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7690|ppo_ep: 1|act_loss: 0.002208709716796875|cri_loss: 0.0012598037719726562|unsuper_loss: 0.0
-average reward score: 5.5625
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.63%) |Training time=0.47s (21.52%) |Others=0.11 (4.85%)|CurSamplesPerSec=14.54 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7691|ppo_ep: 1|act_loss: 0.0294189453125|cri_loss: 0.014984130859375|unsuper_loss: 0.0
-average reward score: 5.1640625
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.71s (73.93%) |Training time=0.50s (21.81%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7692|ppo_ep: 1|act_loss: 0.0054931640625|cri_loss: 0.0031280517578125|unsuper_loss: 0.0
-average reward score: 5.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.76%) |Training time=0.48s (21.69%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.54 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7693|ppo_ep: 1|act_loss: 0.051361083984375|cri_loss: 0.02728271484375|unsuper_loss: 0.0
-average reward score: 5.859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.00%) |Training time=0.47s (21.47%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7694|ppo_ep: 1|act_loss: 0.024505615234375|cri_loss: 0.0128173828125|unsuper_loss: 0.0
-average reward score: 5.4296875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.84%) |Training time=0.47s (21.53%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7695|ppo_ep: 1|act_loss: -0.010711669921875|cri_loss: -0.0051727294921875|unsuper_loss: 0.0
-average reward score: 5.23046875
--------------------------------------------------------------------------------------
-|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.10%) |Training time=0.49s (20.74%) |Others=0.10 (4.16%)|CurSamplesPerSec=13.55 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7696|ppo_ep: 1|act_loss: -0.0024051666259765625|cri_loss: -0.0011453628540039062|unsuper_loss: 0.0
-average reward score: 5.12890625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.94%) |Training time=0.47s (21.44%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7697|ppo_ep: 1|act_loss: 0.009765625|cri_loss: 0.004962921142578125|unsuper_loss: 0.0
-average reward score: 4.58203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.14%) |Training time=0.46s (21.29%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7698|ppo_ep: 1|act_loss: 0.045196533203125|cri_loss: 0.02325439453125|unsuper_loss: 0.0
-average reward score: 5.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.31%) |Training time=0.46s (21.06%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.56
-[2023-04-14 13:29:30,655] [INFO] [logging.py:96:log_dist] [Rank 0] step=7700, skipped=97, lr=[1.5353263662036897e-07, 1.5353263662036897e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:29:30,673] [INFO] [timer.py:199:stop] epoch=0/micro_step=7700/global_step=7700, RunningAvgSamplesPerSec=105.7013104552143, CurrSamplesPerSec=98.56926587346759, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:29:30,766] [INFO] [logging.py:96:log_dist] [Rank 0] step=7700, skipped=132, lr=[8.820054784123288e-08, 8.820054784123288e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7699|ppo_ep: 1|act_loss: 0.003551483154296875|cri_loss: 0.0018930435180664062|unsuper_loss: 0.0
-average reward score: 5.65234375
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.31%) |Training time=0.49s (22.24%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7700|ppo_ep: 1|act_loss: -0.022552490234375|cri_loss: -0.009979248046875|unsuper_loss: 0.0
-average reward score: 5.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.46s (21.38%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7701|ppo_ep: 1|act_loss: 0.0167694091796875|cri_loss: 0.00882720947265625|unsuper_loss: 0.0
-average reward score: 5.36328125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.75%) |Training time=0.47s (21.64%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7702|ppo_ep: 1|act_loss: -0.024078369140625|cri_loss: -0.01169586181640625|unsuper_loss: 0.0
-average reward score: 5.09375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.48s (21.99%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7703|ppo_ep: 1|act_loss: -0.004787445068359375|cri_loss: -0.00232696533203125|unsuper_loss: 0.0
-average reward score: 5.28125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.13%) |Training time=0.49s (22.36%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7704|ppo_ep: 1|act_loss: -0.046112060546875|cri_loss: -0.019561767578125|unsuper_loss: 0.0
-average reward score: 6.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.56%) |Training time=0.47s (21.80%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7705|ppo_ep: 1|act_loss: -0.0125885009765625|cri_loss: -0.00618743896484375|unsuper_loss: 0.0
-average reward score: 5.00390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.47%) |Training time=0.48s (22.00%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7706|ppo_ep: 1|act_loss: -0.0083465576171875|cri_loss: -0.0037059783935546875|unsuper_loss: 0.0
-average reward score: 5.76953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.12%) |Training time=0.48s (22.27%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7707|ppo_ep: 1|act_loss: -0.0010509490966796875|cri_loss: -0.0004482269287109375|unsuper_loss: 0.0
-average reward score: 5.61328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.52%) |Training time=0.48s (21.95%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7708|ppo_ep: 1|act_loss: -0.00623321533203125|cri_loss: -0.002918243408203125|unsuper_loss: 0.0
-average reward score: 5.53125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.03%) |Training time=0.49s (22.34%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.56
-[2023-04-14 13:29:52,369] [INFO] [logging.py:96:log_dist] [Rank 0] step=7710, skipped=97, lr=[1.489184666836874e-07, 1.489184666836874e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:29:52,387] [INFO] [timer.py:199:stop] epoch=0/micro_step=7710/global_step=7710, RunningAvgSamplesPerSec=105.69619158975686, CurrSamplesPerSec=102.90253227929671, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:29:52,479] [INFO] [logging.py:96:log_dist] [Rank 0] step=7710, skipped=132, lr=[8.568437559324572e-08, 8.568437559324572e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7709|ppo_ep: 1|act_loss: 0.001888275146484375|cri_loss: 0.00121307373046875|unsuper_loss: 0.0
-average reward score: 5.328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.66%) |Training time=0.47s (21.82%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7710|ppo_ep: 1|act_loss: 0.0034389495849609375|cri_loss: 0.0018596649169921875|unsuper_loss: 0.0
-average reward score: 5.4765625
--------------------------------------------------------------------------------------
-|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.81%) |Training time=0.49s (20.76%) |Others=0.10 (4.43%)|CurSamplesPerSec=13.57 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7711|ppo_ep: 1|act_loss: 0.0123748779296875|cri_loss: 0.006412506103515625|unsuper_loss: 0.0
-average reward score: 4.93359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.85%) |Training time=0.47s (21.61%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7712|ppo_ep: 1|act_loss: 0.006988525390625|cri_loss: 0.00377655029296875|unsuper_loss: 0.0
-average reward score: 5.87890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.90%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7713|ppo_ep: 1|act_loss: -0.0089569091796875|cri_loss: -0.00347900390625|unsuper_loss: 0.0
-average reward score: 5.6484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.80%) |Training time=0.47s (21.64%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7714|ppo_ep: 1|act_loss: 0.007404327392578125|cri_loss: 0.00394439697265625|unsuper_loss: 0.0
-average reward score: 5.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.35%) |Training time=0.48s (22.01%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7715|ppo_ep: 1|act_loss: 0.020233154296875|cri_loss: 0.0104522705078125|unsuper_loss: 0.0
-average reward score: 5.3515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.74%) |Training time=0.47s (21.72%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7716|ppo_ep: 1|act_loss: 0.0013427734375|cri_loss: 0.000820159912109375|unsuper_loss: 0.0
-average reward score: 5.015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.87%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7717|ppo_ep: 1|act_loss: -0.0102996826171875|cri_loss: -0.0048675537109375|unsuper_loss: 0.0
-average reward score: 5.0078125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.96%) |Training time=0.49s (22.51%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7718|ppo_ep: 1|act_loss: 0.0143280029296875|cri_loss: 0.00757598876953125|unsuper_loss: 0.0
-average reward score: 5.59375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.98%) |Training time=0.48s (22.38%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56
-[2023-04-14 13:30:14,266] [INFO] [logging.py:96:log_dist] [Rank 0] step=7720, skipped=97, lr=[1.4437360755576166e-07, 1.4437360755576166e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:30:14,285] [INFO] [timer.py:199:stop] epoch=0/micro_step=7720/global_step=7720, RunningAvgSamplesPerSec=105.68932583761074, CurrSamplesPerSec=93.3791178439091, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:30:14,369] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-[2023-04-14 13:30:14,369] [INFO] [logging.py:96:log_dist] [Rank 0] step=7720, skipped=133, lr=[8.345041653250202e-08, 8.345041653250202e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7719|ppo_ep: 1|act_loss: 0.0103302001953125|cri_loss: 0.005672454833984375|unsuper_loss: 0.0
-average reward score: 5.03515625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.82%) |Training time=0.50s (23.17%) |Others=0.09 (4.02%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.56
-[2023-04-14 13:30:16,708] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 7720|ppo_ep: 1|act_loss: 0.002162933349609375|cri_loss: 0.00196075439453125|unsuper_loss: 0.0
-average reward score: 5.2109375
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.69s (72.27%) |Training time=0.56s (23.90%) |Others=0.09 (3.83%)|CurSamplesPerSec=13.68 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7721|ppo_ep: 1|act_loss: -0.01486968994140625|cri_loss: -0.0072174072265625|unsuper_loss: 0.0
-average reward score: 5.69140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.93%) |Training time=0.49s (22.55%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7722|ppo_ep: 1|act_loss: -0.00432586669921875|cri_loss: -0.0019664764404296875|unsuper_loss: 0.0
-average reward score: 5.30859375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.66%) |Training time=0.49s (22.74%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7723|ppo_ep: 1|act_loss: 0.0094451904296875|cri_loss: 0.005096435546875|unsuper_loss: 0.0
-average reward score: 5.671875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.84%) |Training time=0.47s (21.67%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7724|ppo_ep: 1|act_loss: -2.2649765014648438e-05|cri_loss: 0.0001900196075439453|unsuper_loss: 0.0
-average reward score: 4.9609375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (22.01%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7725|ppo_ep: 1|act_loss: 0.022003173828125|cri_loss: 0.011322021484375|unsuper_loss: 0.0
-average reward score: 5.52734375
--------------------------------------------------------------------------------------
-|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.77s (74.95%) |Training time=0.49s (20.88%) |Others=0.10 (4.16%)|CurSamplesPerSec=13.57 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7726|ppo_ep: 1|act_loss: -0.0091705322265625|cri_loss: -0.0045166015625|unsuper_loss: 0.0
-average reward score: 5.671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.31%) |Training time=0.46s (21.06%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7727|ppo_ep: 1|act_loss: 0.0120849609375|cri_loss: 0.0061798095703125|unsuper_loss: 0.0
-average reward score: 5.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.73%) |Training time=0.45s (20.72%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7728|ppo_ep: 1|act_loss: -0.005695343017578125|cri_loss: -0.0025634765625|unsuper_loss: 0.0
-average reward score: 5.38671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.17%) |Training time=0.46s (21.19%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56
-[2023-04-14 13:30:36,328] [INFO] [logging.py:96:log_dist] [Rank 0] step=7730, skipped=97, lr=[1.398981266023686e-07, 1.398981266023686e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:30:36,346] [INFO] [timer.py:199:stop] epoch=0/micro_step=7730/global_step=7730, RunningAvgSamplesPerSec=105.68184628578439, CurrSamplesPerSec=101.17140408416168, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:30:36,439] [INFO] [logging.py:96:log_dist] [Rank 0] step=7730, skipped=134, lr=[8.124547094506025e-08, 8.124547094506025e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7729|ppo_ep: 1|act_loss: 0.00047397613525390625|cri_loss: 0.00033283233642578125|unsuper_loss: 0.0
-average reward score: 5.609375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.67%) |Training time=0.48s (21.85%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7730|ppo_ep: 1|act_loss: 0.013671875|cri_loss: 0.006999969482421875|unsuper_loss: 0.0
-average reward score: 5.87109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.29%) |Training time=0.45s (20.99%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7731|ppo_ep: 1|act_loss: -0.02764892578125|cri_loss: -0.01317596435546875|unsuper_loss: 0.0
-average reward score: 5.47265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.26%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7732|ppo_ep: 1|act_loss: -0.032623291015625|cri_loss: -0.0156707763671875|unsuper_loss: 0.0
-average reward score: 5.41796875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.18%) |Training time=0.46s (21.20%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7733|ppo_ep: 1|act_loss: -0.005161285400390625|cri_loss: -0.002452850341796875|unsuper_loss: 0.0
-average reward score: 5.03515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.67%) |Training time=0.45s (20.77%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7734|ppo_ep: 1|act_loss: 0.048309326171875|cri_loss: 0.0247039794921875|unsuper_loss: 0.0
-average reward score: 5.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.44%) |Training time=0.45s (20.94%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7735|ppo_ep: 1|act_loss: -0.01873779296875|cri_loss: -0.0092926025390625|unsuper_loss: 0.0
-average reward score: 5.265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.31%) |Training time=0.46s (21.14%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7736|ppo_ep: 1|act_loss: -0.00949859619140625|cri_loss: -0.0045318603515625|unsuper_loss: 0.0
-average reward score: 6.01171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.20%) |Training time=0.46s (21.18%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7737|ppo_ep: 1|act_loss: -0.021820068359375|cri_loss: -0.004364013671875|unsuper_loss: 0.0
-average reward score: 5.234375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.50%) |Training time=0.45s (20.94%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7738|ppo_ep: 1|act_loss: -0.026641845703125|cri_loss: -0.013214111328125|unsuper_loss: 0.0
-average reward score: 5.26953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.18%) |Training time=0.46s (21.21%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
-[2023-04-14 13:30:57,937] [INFO] [logging.py:96:log_dist] [Rank 0] step=7740, skipped=97, lr=[1.354920901609319e-07, 1.354920901609319e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:30:57,955] [INFO] [timer.py:199:stop] epoch=0/micro_step=7740/global_step=7740, RunningAvgSamplesPerSec=105.68622715622114, CurrSamplesPerSec=109.06858467666899, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:30:58,048] [INFO] [logging.py:96:log_dist] [Rank 0] step=7740, skipped=134, lr=[7.882959173781374e-08, 7.882959173781374e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7739|ppo_ep: 1|act_loss: -0.001194000244140625|cri_loss: -0.0004506111145019531|unsuper_loss: 0.0
-average reward score: 5.8203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.33%) |Training time=0.46s (21.12%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7740|ppo_ep: 1|act_loss: 0.00063323974609375|cri_loss: 0.0004687309265136719|unsuper_loss: 0.0
-average reward score: 6.19140625
--------------------------------------------------------------------------------------
-|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.61%) |Training time=0.47s (20.11%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.62 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7741|ppo_ep: 1|act_loss: 0.02520751953125|cri_loss: 0.012786865234375|unsuper_loss: 0.0
-average reward score: 5.8515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.28%) |Training time=0.46s (21.19%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7742|ppo_ep: 1|act_loss: -0.0069122314453125|cri_loss: -0.0032215118408203125|unsuper_loss: 0.0
-average reward score: 5.71875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.31%) |Training time=0.46s (21.07%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7743|ppo_ep: 1|act_loss: -0.00384521484375|cri_loss: -0.0016021728515625|unsuper_loss: 0.0
-average reward score: 4.95703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.24%) |Training time=0.46s (21.21%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7744|ppo_ep: 1|act_loss: 0.006366729736328125|cri_loss: 0.0032444000244140625|unsuper_loss: 0.0
-average reward score: 5.375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.45%) |Training time=0.45s (20.83%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7745|ppo_ep: 1|act_loss: 0.0296478271484375|cri_loss: 0.0156097412109375|unsuper_loss: 0.0
-average reward score: 5.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.11%) |Training time=0.46s (21.34%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7746|ppo_ep: 1|act_loss: -0.00768280029296875|cri_loss: -0.0029754638671875|unsuper_loss: 0.0
-average reward score: 5.71875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.07%) |Training time=0.46s (21.31%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7747|ppo_ep: 1|act_loss: 0.0009546279907226562|cri_loss: 0.002101898193359375|unsuper_loss: 0.0
-average reward score: 4.83203125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.46%) |Training time=0.46s (21.03%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7748|ppo_ep: 1|act_loss: 0.00020587444305419922|cri_loss: 0.00020742416381835938|unsuper_loss: 0.0
-average reward score: 5.21875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.90%) |Training time=0.47s (21.52%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.56
-[2023-04-14 13:31:19,832] [INFO] [logging.py:96:log_dist] [Rank 0] step=7750, skipped=97, lr=[1.3115556353954087e-07, 1.3115556353954087e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:31:19,851] [INFO] [timer.py:199:stop] epoch=0/micro_step=7750/global_step=7750, RunningAvgSamplesPerSec=105.68781927307126, CurrSamplesPerSec=104.16773291257819, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:31:19,943] [INFO] [logging.py:96:log_dist] [Rank 0] step=7750, skipped=134, lr=[7.644960011677305e-08, 7.644960011677305e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7749|ppo_ep: 1|act_loss: 0.00521087646484375|cri_loss: 0.0029754638671875|unsuper_loss: 0.0
-average reward score: 5.125
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.25%) |Training time=0.47s (21.31%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7750|ppo_ep: 1|act_loss: 0.004547119140625|cri_loss: 0.0027065277099609375|unsuper_loss: 0.0
-average reward score: 5.1015625
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.28%) |Training time=0.46s (20.31%) |Others=0.10 (4.41%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7751|ppo_ep: 1|act_loss: -0.00215911865234375|cri_loss: -0.00099945068359375|unsuper_loss: 0.0
-average reward score: 5.48828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.16%) |Training time=0.46s (21.32%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7752|ppo_ep: 1|act_loss: 0.0113067626953125|cri_loss: 0.0062255859375|unsuper_loss: 0.0
-average reward score: 5.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.09%) |Training time=0.46s (21.28%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7753|ppo_ep: 1|act_loss: -0.0093536376953125|cri_loss: -0.004302978515625|unsuper_loss: 0.0
-average reward score: 5.34375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.37%) |Training time=0.46s (21.10%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7754|ppo_ep: 1|act_loss: 0.0090179443359375|cri_loss: 0.004871368408203125|unsuper_loss: 0.0
-average reward score: 6.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.28%) |Training time=0.46s (21.09%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7755|ppo_ep: 1|act_loss: 0.05841064453125|cri_loss: 0.031768798828125|unsuper_loss: 0.0
-average reward score: 5.1953125
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.84%) |Training time=0.47s (19.97%) |Others=0.10 (4.19%)|CurSamplesPerSec=13.65 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7756|ppo_ep: 1|act_loss: -0.0202178955078125|cri_loss: -0.0099334716796875|unsuper_loss: 0.0
-average reward score: 5.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.11%) |Training time=0.46s (21.27%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7757|ppo_ep: 1|act_loss: 0.00994873046875|cri_loss: 0.00514984130859375|unsuper_loss: 0.0
-average reward score: 5.3125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.32%) |Training time=0.46s (21.14%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7758|ppo_ep: 1|act_loss: 0.0001895427703857422|cri_loss: 0.00017499923706054688|unsuper_loss: 0.0
-average reward score: 5.12890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.77%) |Training time=0.47s (21.61%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56
-[2023-04-14 13:31:41,798] [INFO] [logging.py:96:log_dist] [Rank 0] step=7760, skipped=97, lr=[1.2688861101598217e-07, 1.2688861101598217e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:31:41,816] [INFO] [timer.py:199:stop] epoch=0/micro_step=7760/global_step=7760, RunningAvgSamplesPerSec=105.68886827324046, CurrSamplesPerSec=102.76134876744779, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:31:41,909] [INFO] [logging.py:96:log_dist] [Rank 0] step=7760, skipped=134, lr=[7.410553135915572e-08, 7.410553135915572e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7759|ppo_ep: 1|act_loss: -0.013427734375|cri_loss: -0.006565093994140625|unsuper_loss: 0.0
-average reward score: 5.68359375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.77%) |Training time=0.47s (21.71%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7760|ppo_ep: 1|act_loss: 0.020599365234375|cri_loss: 0.0105438232421875|unsuper_loss: 0.0
-average reward score: 5.6796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.02%) |Training time=0.46s (21.36%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7761|ppo_ep: 1|act_loss: -0.002109527587890625|cri_loss: -0.0009775161743164062|unsuper_loss: 0.0
-average reward score: 5.94921875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.09%) |Training time=0.46s (21.20%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7762|ppo_ep: 1|act_loss: -0.01495361328125|cri_loss: -0.007282257080078125|unsuper_loss: 0.0
-average reward score: 6.00390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.16%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7763|ppo_ep: 1|act_loss: 0.0152587890625|cri_loss: 0.00789642333984375|unsuper_loss: 0.0
-average reward score: 5.45703125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.22%) |Training time=0.46s (21.24%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7764|ppo_ep: 1|act_loss: 0.0494384765625|cri_loss: 0.02557373046875|unsuper_loss: 0.0
-average reward score: 5.34375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.22%) |Training time=0.46s (21.14%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7765|ppo_ep: 1|act_loss: -0.00846099853515625|cri_loss: -0.0038127899169921875|unsuper_loss: 0.0
-average reward score: 5.59375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.26%) |Training time=0.46s (21.19%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7766|ppo_ep: 1|act_loss: -0.014495849609375|cri_loss: -0.00702667236328125|unsuper_loss: 0.0
-average reward score: 4.88671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.32%) |Training time=0.45s (21.04%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7767|ppo_ep: 1|act_loss: 0.02105712890625|cri_loss: 0.01104736328125|unsuper_loss: 0.0
-average reward score: 5.6484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.46s (21.30%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7768|ppo_ep: 1|act_loss: -0.0157470703125|cri_loss: -0.0063934326171875|unsuper_loss: 0.0
-average reward score: 5.47265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.25%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56
-[2023-04-14 13:32:03,410] [INFO] [logging.py:96:log_dist] [Rank 0] step=7770, skipped=97, lr=[1.2269129583678402e-07, 1.2269129583678402e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:32:03,429] [INFO] [timer.py:199:stop] epoch=0/micro_step=7770/global_step=7770, RunningAvgSamplesPerSec=105.69178947512732, CurrSamplesPerSec=107.38045341847963, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:32:03,521] [INFO] [logging.py:96:log_dist] [Rank 0] step=7770, skipped=134, lr=[7.17974202097152e-08, 7.17974202097152e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7769|ppo_ep: 1|act_loss: -0.012664794921875|cri_loss: -0.006145477294921875|unsuper_loss: 0.0
-average reward score: 5.06640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.07%) |Training time=0.46s (21.31%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7770|ppo_ep: 1|act_loss: 0.01064300537109375|cri_loss: 0.0056610107421875|unsuper_loss: 0.0
-average reward score: 5.171875
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.60s (70.19%) |Training time=0.58s (25.50%) |Others=0.10 (4.31%)|CurSamplesPerSec=14.04 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7771|ppo_ep: 1|act_loss: -0.0009503364562988281|cri_loss: -0.00011730194091796875|unsuper_loss: 0.0
-average reward score: 5.25
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.07%) |Training time=0.46s (21.39%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7772|ppo_ep: 1|act_loss: -0.03314208984375|cri_loss: -0.0156707763671875|unsuper_loss: 0.0
-average reward score: 5.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.10%) |Training time=0.46s (21.27%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7773|ppo_ep: 1|act_loss: -0.012664794921875|cri_loss: -0.006103515625|unsuper_loss: 0.0
-average reward score: 4.87890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.13%) |Training time=0.46s (21.31%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7774|ppo_ep: 1|act_loss: 0.0161590576171875|cri_loss: 0.00826263427734375|unsuper_loss: 0.0
-average reward score: 5.40234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.94%) |Training time=0.47s (21.42%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7775|ppo_ep: 1|act_loss: -0.0257110595703125|cri_loss: -0.0124664306640625|unsuper_loss: 0.0
-average reward score: 4.8984375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.43%) |Training time=0.46s (21.06%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7776|ppo_ep: 1|act_loss: -0.032073974609375|cri_loss: -0.014984130859375|unsuper_loss: 0.0
-average reward score: 5.5703125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.03%) |Training time=0.46s (21.29%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7777|ppo_ep: 1|act_loss: -0.016937255859375|cri_loss: -0.007793426513671875|unsuper_loss: 0.0
-average reward score: 5.93359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.11%) |Training time=0.46s (21.33%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7778|ppo_ep: 1|act_loss: -0.0005383491516113281|cri_loss: -0.00024020671844482422|unsuper_loss: 0.0
-average reward score: 5.74609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.08%) |Training time=0.46s (21.32%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56
-[2023-04-14 13:32:25,299] [INFO] [logging.py:96:log_dist] [Rank 0] step=7780, skipped=97, lr=[1.1856368021628366e-07, 1.1856368021628366e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:32:25,318] [INFO] [timer.py:199:stop] epoch=0/micro_step=7780/global_step=7780, RunningAvgSamplesPerSec=105.68915657133162, CurrSamplesPerSec=104.81370925924769, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:32:25,410] [INFO] [logging.py:96:log_dist] [Rank 0] step=7780, skipped=134, lr=[6.952530088022713e-08, 6.952530088022713e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7779|ppo_ep: 1|act_loss: 0.00714874267578125|cri_loss: 0.0036907196044921875|unsuper_loss: 0.0
-average reward score: 5.87109375
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.23%) |Training time=0.47s (20.48%) |Others=0.10 (4.29%)|CurSamplesPerSec=14.01 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7780|ppo_ep: 1|act_loss: -0.004230499267578125|cri_loss: -0.0018758773803710938|unsuper_loss: 0.0
-average reward score: 5.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.24%) |Training time=0.46s (21.13%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7781|ppo_ep: 1|act_loss: 0.04168701171875|cri_loss: 0.0229034423828125|unsuper_loss: 0.0
-average reward score: 5.515625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.52%) |Training time=0.45s (20.92%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7782|ppo_ep: 1|act_loss: -0.016357421875|cri_loss: -0.00785064697265625|unsuper_loss: 0.0
-average reward score: 6.0078125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (20.97%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7783|ppo_ep: 1|act_loss: 0.010711669921875|cri_loss: 0.005565643310546875|unsuper_loss: 0.0
-average reward score: 5.5625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.44%) |Training time=0.45s (20.91%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7784|ppo_ep: 1|act_loss: -0.024505615234375|cri_loss: -0.0116729736328125|unsuper_loss: 0.0
-average reward score: 5.15625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.36%) |Training time=0.45s (21.00%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7785|ppo_ep: 1|act_loss: 0.01143646240234375|cri_loss: 0.00632476806640625|unsuper_loss: 0.0
-average reward score: 5.28125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.44%) |Training time=0.45s (20.91%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7786|ppo_ep: 1|act_loss: -0.0153656005859375|cri_loss: -0.007503509521484375|unsuper_loss: 0.0
-average reward score: 5.53515625
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.46%) |Training time=0.44s (19.20%) |Others=0.10 (4.34%)|CurSamplesPerSec=13.82 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7787|ppo_ep: 1|act_loss: -0.03143310546875|cri_loss: -0.014678955078125|unsuper_loss: 0.0
-average reward score: 5.1328125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.37%) |Training time=0.45s (21.07%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7788|ppo_ep: 1|act_loss: -0.008697509765625|cri_loss: -0.0040130615234375|unsuper_loss: 0.0
-average reward score: 5.13671875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.91%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.56
-[2023-04-14 13:32:47,019] [INFO] [logging.py:96:log_dist] [Rank 0] step=7790, skipped=97, lr=[1.1450582533570157e-07, 1.1450582533570157e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:32:47,038] [INFO] [timer.py:199:stop] epoch=0/micro_step=7790/global_step=7790, RunningAvgSamplesPerSec=105.69526710763964, CurrSamplesPerSec=110.04501902986237, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:32:47,131] [INFO] [logging.py:96:log_dist] [Rank 0] step=7790, skipped=134, lr=[6.72892070489814e-08, 6.72892070489814e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7789|ppo_ep: 1|act_loss: 0.032135009765625|cri_loss: 0.016845703125|unsuper_loss: 0.0
-average reward score: 5.25390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.39%) |Training time=0.45s (20.95%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7790|ppo_ep: 1|act_loss: 0.01026153564453125|cri_loss: 0.005901336669921875|unsuper_loss: 0.0
-average reward score: 5.9765625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.46s (21.17%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56
-[2023-04-14 13:32:51,323] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 7791|ppo_ep: 1|act_loss: 0.0299835205078125|cri_loss: 0.01529693603515625|unsuper_loss: 0.0
-average reward score: 6.15625
--------------------------------------------------------------------------------------
-|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.65%) |Training time=0.42s (19.73%) |Others=0.10 (4.62%)|CurSamplesPerSec=15.07 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7792|ppo_ep: 1|act_loss: 0.0181121826171875|cri_loss: 0.00939178466796875|unsuper_loss: 0.0
-average reward score: 5.65625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.91%) |Training time=0.44s (20.41%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7793|ppo_ep: 1|act_loss: 0.05157470703125|cri_loss: 0.028289794921875|unsuper_loss: 0.0
-average reward score: 5.875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.64%) |Training time=0.45s (20.79%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7794|ppo_ep: 1|act_loss: 0.0053863525390625|cri_loss: 0.00293731689453125|unsuper_loss: 0.0
-average reward score: 5.12109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.55%) |Training time=0.45s (20.79%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7795|ppo_ep: 1|act_loss: 0.0040130615234375|cri_loss: 0.0021514892578125|unsuper_loss: 0.0
-average reward score: 5.46484375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.76%) |Training time=0.44s (20.67%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7796|ppo_ep: 1|act_loss: -0.007541656494140625|cri_loss: -0.0037384033203125|unsuper_loss: 0.0
-average reward score: 5.26171875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.53%) |Training time=0.45s (20.82%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7797|ppo_ep: 1|act_loss: 0.0087738037109375|cri_loss: 0.00452423095703125|unsuper_loss: 0.0
-average reward score: 6.40625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.63%) |Training time=0.45s (20.80%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7798|ppo_ep: 1|act_loss: -0.00363922119140625|cri_loss: -0.0017547607421875|unsuper_loss: 0.0
-average reward score: 5.765625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.44%) |Training time=0.45s (20.90%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.56
-[2023-04-14 13:33:08,492] [INFO] [logging.py:96:log_dist] [Rank 0] step=7800, skipped=98, lr=[1.1091345110884795e-07, 1.1091345110884795e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:33:08,510] [INFO] [timer.py:199:stop] epoch=0/micro_step=7800/global_step=7800, RunningAvgSamplesPerSec=105.7045714232828, CurrSamplesPerSec=112.16469150844927, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:33:08,603] [INFO] [logging.py:96:log_dist] [Rank 0] step=7800, skipped=134, lr=[6.508917186028196e-08, 6.508917186028196e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7799|ppo_ep: 1|act_loss: 0.03814697265625|cri_loss: 0.020233154296875|unsuper_loss: 0.0
-average reward score: 5.65625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.50%) |Training time=0.45s (20.85%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7800|ppo_ep: 1|act_loss: -0.0081634521484375|cri_loss: -0.003772735595703125|unsuper_loss: 0.0
-average reward score: 4.70703125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.48%) |Training time=0.45s (20.82%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7801|ppo_ep: 1|act_loss: -0.009796142578125|cri_loss: -0.0047454833984375|unsuper_loss: 0.0
-average reward score: 5.51171875
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.42%) |Training time=0.45s (19.35%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.56
-[2023-04-14 13:33:15,111] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096
-epoch: 0|step: 7802|ppo_ep: 1|act_loss: -0.0110321044921875|cri_loss: -0.005035400390625|unsuper_loss: 0.0
-average reward score: 5.4609375
--------------------------------------------------------------------------------------
-|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.41%) |Training time=0.42s (19.85%) |Others=0.10 (4.74%)|CurSamplesPerSec=15.08 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7803|ppo_ep: 1|act_loss: 0.02301025390625|cri_loss: 0.01174163818359375|unsuper_loss: 0.0
-average reward score: 5.55078125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.80%) |Training time=0.45s (20.66%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7804|ppo_ep: 1|act_loss: 0.0006136894226074219|cri_loss: 0.00036263465881347656|unsuper_loss: 0.0
-average reward score: 5.7890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.46s (21.19%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7805|ppo_ep: 1|act_loss: -0.0016498565673828125|cri_loss: -0.0007781982421875|unsuper_loss: 0.0
-average reward score: 6.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.71%) |Training time=0.44s (20.64%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7806|ppo_ep: 1|act_loss: 0.015472412109375|cri_loss: 0.00795745849609375|unsuper_loss: 0.0
-average reward score: 5.4609375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.51%) |Training time=0.45s (20.83%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7807|ppo_ep: 1|act_loss: 0.0377197265625|cri_loss: 0.021759033203125|unsuper_loss: 0.0
-average reward score: 5.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.23%) |Training time=0.46s (21.22%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7808|ppo_ep: 1|act_loss: -0.02886962890625|cri_loss: -0.014007568359375|unsuper_loss: 0.0
-average reward score: 5.26171875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.04%) |Training time=0.47s (21.37%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.56
-[2023-04-14 13:33:30,325] [INFO] [logging.py:96:log_dist] [Rank 0] step=7810, skipped=99, lr=[1.073776749444405e-07, 1.073776749444405e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:33:30,344] [INFO] [timer.py:199:stop] epoch=0/micro_step=7810/global_step=7810, RunningAvgSamplesPerSec=105.71154895925527, CurrSamplesPerSec=110.02490234735241, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:33:30,436] [INFO] [logging.py:96:log_dist] [Rank 0] step=7810, skipped=134, lr=[6.292522792395812e-08, 6.292522792395812e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7809|ppo_ep: 1|act_loss: -0.0090484619140625|cri_loss: -0.004367828369140625|unsuper_loss: 0.0
-average reward score: 5.5625
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.65%) |Training time=0.45s (20.02%) |Others=0.10 (4.33%)|CurSamplesPerSec=14.13 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7810|ppo_ep: 1|act_loss: 0.044677734375|cri_loss: 0.022857666015625|unsuper_loss: 0.0
-average reward score: 5.76953125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.90%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7811|ppo_ep: 1|act_loss: -0.0088348388671875|cri_loss: -0.00435638427734375|unsuper_loss: 0.0
-average reward score: 5.28515625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.79%) |Training time=0.44s (20.62%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7812|ppo_ep: 1|act_loss: -0.028411865234375|cri_loss: -0.01348114013671875|unsuper_loss: 0.0
-average reward score: 5.10546875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.80%) |Training time=0.44s (20.54%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7813|ppo_ep: 1|act_loss: -0.03485107421875|cri_loss: -0.0166015625|unsuper_loss: 0.0
-average reward score: 5.78125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.51%) |Training time=0.45s (20.84%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7814|ppo_ep: 1|act_loss: 0.0007061958312988281|cri_loss: 0.0004711151123046875|unsuper_loss: 0.0
-average reward score: 5.078125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.52%) |Training time=0.45s (20.83%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7815|ppo_ep: 1|act_loss: -0.08453369140625|cri_loss: -0.03948974609375|unsuper_loss: 0.0
-average reward score: 5.54296875
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.61s (72.42%) |Training time=0.46s (20.95%) |Others=0.15 (6.64%)|CurSamplesPerSec=14.43 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7816|ppo_ep: 1|act_loss: 0.001010894775390625|cri_loss: 0.0006361007690429688|unsuper_loss: 0.0
-average reward score: 4.92578125
--------------------------------------------------------------------------------------
-|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.67s (75.08%) |Training time=0.45s (20.05%) |Others=0.11 (4.87%)|CurSamplesPerSec=14.36 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7817|ppo_ep: 1|act_loss: -0.04296875|cri_loss: -0.021209716796875|unsuper_loss: 0.0
-average reward score: 4.84375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.58%) |Training time=0.45s (20.80%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7818|ppo_ep: 1|act_loss: -0.0019664764404296875|cri_loss: -0.0007276535034179688|unsuper_loss: 0.0
-average reward score: 5.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.47%) |Training time=0.45s (20.88%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.56
-[2023-04-14 13:33:51,976] [INFO] [logging.py:96:log_dist] [Rank 0] step=7820, skipped=99, lr=[1.0351546681472116e-07, 1.0351546681472116e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:33:51,994] [INFO] [timer.py:199:stop] epoch=0/micro_step=7820/global_step=7820, RunningAvgSamplesPerSec=105.7191999963779, CurrSamplesPerSec=114.77339353437402, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:33:52,087] [INFO] [logging.py:96:log_dist] [Rank 0] step=7820, skipped=134, lr=[6.079740731487898e-08, 6.079740731487898e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7819|ppo_ep: 1|act_loss: 0.0115509033203125|cri_loss: 0.00637054443359375|unsuper_loss: 0.0
-average reward score: 5.8359375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.89%) |Training time=0.44s (20.54%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7820|ppo_ep: 1|act_loss: -0.0177001953125|cri_loss: -0.00807952880859375|unsuper_loss: 0.0
-average reward score: 5.40234375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.54%) |Training time=0.42s (19.75%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
-[2023-04-14 13:33:56,375] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 7821|ppo_ep: 1|act_loss: 0.01320648193359375|cri_loss: 0.0069122314453125|unsuper_loss: 0.0
-average reward score: 6.01953125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.07%) |Training time=0.44s (20.82%) |Others=0.09 (4.10%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.56
-[2023-04-14 13:33:58,514] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 7822|ppo_ep: 1|act_loss: -0.004489898681640625|cri_loss: -0.001972198486328125|unsuper_loss: 0.0
-average reward score: 6.29296875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.81%) |Training time=0.45s (20.98%) |Others=0.09 (4.21%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7823|ppo_ep: 1|act_loss: -0.0143280029296875|cri_loss: -0.00704193115234375|unsuper_loss: 0.0
-average reward score: 5.59375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.34%) |Training time=0.45s (21.02%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7824|ppo_ep: 1|act_loss: -0.003658294677734375|cri_loss: -0.0014791488647460938|unsuper_loss: 0.0
-average reward score: 5.6796875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.35%) |Training time=0.45s (20.99%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7825|ppo_ep: 1|act_loss: 0.0084686279296875|cri_loss: 0.0049591064453125|unsuper_loss: 0.0
-average reward score: 5.63671875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.50%) |Training time=0.45s (20.83%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7826|ppo_ep: 1|act_loss: -0.00666046142578125|cri_loss: -0.003215789794921875|unsuper_loss: 0.0
-average reward score: 5.00390625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.30%) |Training time=0.45s (21.07%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7827|ppo_ep: 1|act_loss: 0.016510009765625|cri_loss: 0.0093536376953125|unsuper_loss: 0.0
-average reward score: 4.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.37%) |Training time=0.45s (20.96%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7828|ppo_ep: 1|act_loss: 0.022125244140625|cri_loss: 0.01151275634765625|unsuper_loss: 0.0
-average reward score: 5.15625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.44%) |Training time=0.45s (20.89%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
-[2023-04-14 13:34:13,448] [INFO] [logging.py:96:log_dist] [Rank 0] step=7830, skipped=99, lr=[9.972324247574424e-08, 9.972324247574424e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:34:13,466] [INFO] [timer.py:199:stop] epoch=0/micro_step=7830/global_step=7830, RunningAvgSamplesPerSec=105.7271991751751, CurrSamplesPerSec=111.44625006331337, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:34:13,559] [INFO] [logging.py:96:log_dist] [Rank 0] step=7830, skipped=136, lr=[5.912118083573476e-08, 5.912118083573476e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7829|ppo_ep: 1|act_loss: 0.01406097412109375|cri_loss: 0.007244110107421875|unsuper_loss: 0.0
-average reward score: 5.25390625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.46%) |Training time=0.45s (20.98%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7830|ppo_ep: 1|act_loss: 0.0462646484375|cri_loss: 0.0237579345703125|unsuper_loss: 0.0
-average reward score: 6.25
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.41%) |Training time=0.45s (20.94%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7831|ppo_ep: 1|act_loss: 0.0146026611328125|cri_loss: 0.00739288330078125|unsuper_loss: 0.0
-average reward score: 5.12109375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.66%) |Training time=0.44s (20.66%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7832|ppo_ep: 1|act_loss: 0.0281982421875|cri_loss: 0.01439666748046875|unsuper_loss: 0.0
-average reward score: 5.42578125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.82%) |Training time=0.44s (20.51%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7833|ppo_ep: 1|act_loss: 0.0078125|cri_loss: 0.00453948974609375|unsuper_loss: 0.0
-average reward score: 5.5390625
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.76s (76.40%) |Training time=0.45s (19.34%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.86 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7834|ppo_ep: 1|act_loss: 0.00756072998046875|cri_loss: 0.003940582275390625|unsuper_loss: 0.0
-average reward score: 5.359375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.83%) |Training time=0.44s (20.39%) |Others=0.10 (4.79%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7835|ppo_ep: 1|act_loss: 0.00428009033203125|cri_loss: 0.002353668212890625|unsuper_loss: 0.0
-average reward score: 5.17578125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.33%) |Training time=0.43s (20.07%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7836|ppo_ep: 1|act_loss: 0.00107574462890625|cri_loss: 0.0007843971252441406|unsuper_loss: 0.0
-average reward score: 4.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.18%) |Training time=0.46s (21.15%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7837|ppo_ep: 1|act_loss: 0.01763916015625|cri_loss: 0.009185791015625|unsuper_loss: 0.0
-average reward score: 6.05859375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.71%) |Training time=0.48s (21.78%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7838|ppo_ep: 1|act_loss: -0.01302337646484375|cri_loss: -0.006412506103515625|unsuper_loss: 0.0
-average reward score: 5.3125
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.09%) |Training time=0.47s (21.35%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.56
-[2023-04-14 13:34:35,280] [INFO] [logging.py:96:log_dist] [Rank 0] step=7840, skipped=99, lr=[9.60010581374241e-08, 9.60010581374241e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:34:35,298] [INFO] [timer.py:199:stop] epoch=0/micro_step=7840/global_step=7840, RunningAvgSamplesPerSec=105.7335546704453, CurrSamplesPerSec=107.28166269409262, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:34:35,392] [INFO] [logging.py:96:log_dist] [Rank 0] step=7840, skipped=136, lr=[5.705846133496118e-08, 5.705846133496118e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7839|ppo_ep: 1|act_loss: -0.00060272216796875|cri_loss: 0.000186920166015625|unsuper_loss: 0.0
-average reward score: 5.5859375
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.21%) |Training time=0.46s (20.33%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.12 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7840|ppo_ep: 1|act_loss: 0.01113128662109375|cri_loss: 0.00600433349609375|unsuper_loss: 0.0
-average reward score: 5.60546875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.38%) |Training time=0.48s (22.03%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.56
-[2023-04-14 13:34:39,625] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 4096, reducing to 2048
-epoch: 0|step: 7841|ppo_ep: 1|act_loss: 0.04718017578125|cri_loss: 0.026702880859375|unsuper_loss: 0.0
-average reward score: 5.7109375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.88%) |Training time=0.44s (20.44%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7842|ppo_ep: 1|act_loss: -0.0233612060546875|cri_loss: -0.0114898681640625|unsuper_loss: 0.0
-average reward score: 5.92578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.98%) |Training time=0.46s (21.38%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7843|ppo_ep: 1|act_loss: -0.00299072265625|cri_loss: -0.001346588134765625|unsuper_loss: 0.0
-average reward score: 5.484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.72%) |Training time=0.47s (21.67%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7844|ppo_ep: 1|act_loss: -0.0024204254150390625|cri_loss: -0.0009531974792480469|unsuper_loss: 0.0
-average reward score: 5.0078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.67%) |Training time=0.47s (21.72%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7845|ppo_ep: 1|act_loss: 0.016998291015625|cri_loss: 0.0087432861328125|unsuper_loss: 0.0
-average reward score: 5.81640625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.86%) |Training time=0.47s (21.62%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7846|ppo_ep: 1|act_loss: 0.0045928955078125|cri_loss: 0.0025310516357421875|unsuper_loss: 0.0
-average reward score: 5.671875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.59%) |Training time=0.47s (21.45%) |Others=0.11 (4.96%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7847|ppo_ep: 1|act_loss: 0.007110595703125|cri_loss: 0.003620147705078125|unsuper_loss: 0.0
-average reward score: 5.28125
--------------------------------------------------------------------------------------
-|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.53%) |Training time=0.50s (21.23%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.52 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7848|ppo_ep: 1|act_loss: -0.0013885498046875|cri_loss: -0.0004911422729492188|unsuper_loss: 0.0
-average reward score: 4.671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.12%) |Training time=0.46s (21.34%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56
-[2023-04-14 13:34:57,189] [INFO] [logging.py:96:log_dist] [Rank 0] step=7850, skipped=100, lr=[9.271102205441317e-08, 9.271102205441317e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:34:57,208] [INFO] [timer.py:199:stop] epoch=0/micro_step=7850/global_step=7850, RunningAvgSamplesPerSec=105.73152737795587, CurrSamplesPerSec=106.00033328041914, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:34:57,301] [INFO] [logging.py:96:log_dist] [Rank 0] step=7850, skipped=136, lr=[5.5031952121065715e-08, 5.5031952121065715e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7849|ppo_ep: 1|act_loss: 0.003459930419921875|cri_loss: 0.001934051513671875|unsuper_loss: 0.0
-average reward score: 5.44921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.02%) |Training time=0.47s (21.43%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7850|ppo_ep: 1|act_loss: 0.007312774658203125|cri_loss: 0.003902435302734375|unsuper_loss: 0.0
-average reward score: 5.87890625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.53%) |Training time=0.48s (21.84%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7851|ppo_ep: 1|act_loss: 0.00341033935546875|cri_loss: 0.002071380615234375|unsuper_loss: 0.0
-average reward score: 5.3671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.10%) |Training time=0.46s (21.35%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7852|ppo_ep: 1|act_loss: -0.00308990478515625|cri_loss: 0.0007801055908203125|unsuper_loss: 0.0
-average reward score: 5.4765625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.55%) |Training time=0.48s (21.83%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7853|ppo_ep: 1|act_loss: 0.020660400390625|cri_loss: 0.010772705078125|unsuper_loss: 0.0
-average reward score: 5.33203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.72%) |Training time=0.47s (21.67%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7854|ppo_ep: 1|act_loss: 0.030853271484375|cri_loss: 0.0167236328125|unsuper_loss: 0.0
-average reward score: 5.0703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.80%) |Training time=0.47s (21.59%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7855|ppo_ep: 1|act_loss: 0.0005464553833007812|cri_loss: 0.0004324913024902344|unsuper_loss: 0.0
-average reward score: 5.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.88%) |Training time=0.47s (21.60%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7856|ppo_ep: 1|act_loss: -0.03692626953125|cri_loss: -0.0179595947265625|unsuper_loss: 0.0
-average reward score: 4.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.73%) |Training time=0.47s (21.67%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7857|ppo_ep: 1|act_loss: -0.0096435546875|cri_loss: -0.0046844482421875|unsuper_loss: 0.0
-average reward score: 5.5859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.92%) |Training time=0.47s (21.55%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7858|ppo_ep: 1|act_loss: -0.0118255615234375|cri_loss: -0.005863189697265625|unsuper_loss: 0.0
-average reward score: 5.328125
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.86%) |Training time=0.47s (21.57%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.56
-[2023-04-14 13:35:18,956] [INFO] [logging.py:96:log_dist] [Rank 0] step=7860, skipped=100, lr=[8.912206485681388e-08, 8.912206485681388e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:35:18,975] [INFO] [timer.py:199:stop] epoch=0/micro_step=7860/global_step=7860, RunningAvgSamplesPerSec=105.72896164280641, CurrSamplesPerSec=104.51759121109141, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:35:19,067] [INFO] [logging.py:96:log_dist] [Rank 0] step=7860, skipped=136, lr=[5.304168323180425e-08, 5.304168323180425e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7859|ppo_ep: 1|act_loss: 0.0003643035888671875|cri_loss: 0.00044035911560058594|unsuper_loss: 0.0
-average reward score: 5.453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.89%) |Training time=0.47s (21.60%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7860|ppo_ep: 1|act_loss: -0.00658416748046875|cri_loss: -0.0031833648681640625|unsuper_loss: 0.0
-average reward score: 5.69140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.90%) |Training time=0.47s (21.48%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7861|ppo_ep: 1|act_loss: 0.002376556396484375|cri_loss: 0.001617431640625|unsuper_loss: 0.0
-average reward score: 5.74609375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.01%) |Training time=0.47s (21.51%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7862|ppo_ep: 1|act_loss: 0.0129241943359375|cri_loss: 0.006740570068359375|unsuper_loss: 0.0
-average reward score: 5.5859375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.42%) |Training time=0.47s (21.65%) |Others=0.11 (4.93%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7863|ppo_ep: 1|act_loss: 0.0234375|cri_loss: 0.01204681396484375|unsuper_loss: 0.0
-average reward score: 5.44140625
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.61%) |Training time=0.48s (20.60%) |Others=0.11 (4.79%)|CurSamplesPerSec=13.66 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7864|ppo_ep: 1|act_loss: -0.00978851318359375|cri_loss: -0.0033740997314453125|unsuper_loss: 0.0
-average reward score: 5.36328125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.05%) |Training time=0.46s (21.33%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7865|ppo_ep: 1|act_loss: 0.0022487640380859375|cri_loss: 0.0019283294677734375|unsuper_loss: 0.0
-average reward score: 5.625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.18%) |Training time=0.46s (21.24%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7866|ppo_ep: 1|act_loss: 0.0089569091796875|cri_loss: 0.00506591796875|unsuper_loss: 0.0
-average reward score: 5.484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.21%) |Training time=0.46s (21.16%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7867|ppo_ep: 1|act_loss: 0.0088348388671875|cri_loss: 0.004520416259765625|unsuper_loss: 0.0
-average reward score: 5.51171875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.49%) |Training time=0.46s (20.98%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7868|ppo_ep: 1|act_loss: -0.03363037109375|cri_loss: -0.0160980224609375|unsuper_loss: 0.0
-average reward score: 5.90234375
--------------------------------------------------------------------------------------
-|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.71s (74.73%) |Training time=0.48s (20.89%) |Others=0.10 (4.39%)|CurSamplesPerSec=14.00 |AvgSamplesPerSec=14.56
-[2023-04-14 13:35:40,958] [INFO] [logging.py:96:log_dist] [Rank 0] step=7870, skipped=100, lr=[8.560330479489633e-08, 8.560330479489633e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:35:40,977] [INFO] [timer.py:199:stop] epoch=0/micro_step=7870/global_step=7870, RunningAvgSamplesPerSec=105.72843790630478, CurrSamplesPerSec=109.30619119053087, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:35:41,069] [INFO] [logging.py:96:log_dist] [Rank 0] step=7870, skipped=136, lr=[5.108768416776266e-08, 5.108768416776266e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7869|ppo_ep: 1|act_loss: 0.004215240478515625|cri_loss: 0.002651214599609375|unsuper_loss: 0.0
-average reward score: 5.265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.33%) |Training time=0.46s (21.13%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7870|ppo_ep: 1|act_loss: 0.0010814666748046875|cri_loss: 0.0006008148193359375|unsuper_loss: 0.0
-average reward score: 5.28125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.24%) |Training time=0.46s (21.13%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7871|ppo_ep: 1|act_loss: 0.0111083984375|cri_loss: 0.00591278076171875|unsuper_loss: 0.0
-average reward score: 5.2578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.36%) |Training time=0.45s (21.08%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7872|ppo_ep: 1|act_loss: -0.00525665283203125|cri_loss: -0.0023651123046875|unsuper_loss: 0.0
-average reward score: 5.3984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.22%) |Training time=0.46s (21.14%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7873|ppo_ep: 1|act_loss: 0.024139404296875|cri_loss: 0.0124359130859375|unsuper_loss: 0.0
-average reward score: 4.65234375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.24%) |Training time=0.46s (21.22%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7874|ppo_ep: 1|act_loss: 0.008209228515625|cri_loss: 0.00452423095703125|unsuper_loss: 0.0
-average reward score: 5.24609375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.61%) |Training time=0.47s (21.79%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7875|ppo_ep: 1|act_loss: -0.016693115234375|cri_loss: -0.00795745849609375|unsuper_loss: 0.0
-average reward score: 5.4453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.11%) |Training time=0.46s (21.36%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7876|ppo_ep: 1|act_loss: 0.00591278076171875|cri_loss: 0.003322601318359375|unsuper_loss: 0.0
-average reward score: 6.03515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.89%) |Training time=0.47s (21.49%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7877|ppo_ep: 1|act_loss: -0.0061492919921875|cri_loss: -0.00229644775390625|unsuper_loss: 0.0
-average reward score: 5.84375
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.92%) |Training time=0.47s (19.87%) |Others=0.10 (4.21%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7878|ppo_ep: 1|act_loss: -0.009002685546875|cri_loss: -0.0041046142578125|unsuper_loss: 0.0
-average reward score: 5.7734375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.26%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56
-[2023-04-14 13:36:02,778] [INFO] [logging.py:96:log_dist] [Rank 0] step=7880, skipped=100, lr=[8.215479402517389e-08, 8.215479402517389e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:36:02,797] [INFO] [timer.py:199:stop] epoch=0/micro_step=7880/global_step=7880, RunningAvgSamplesPerSec=105.72969054760189, CurrSamplesPerSec=104.98801479963392, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:36:02,889] [INFO] [logging.py:96:log_dist] [Rank 0] step=7880, skipped=136, lr=[4.9169983891921294e-08, 4.9169983891921294e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7879|ppo_ep: 1|act_loss: -0.016082763671875|cri_loss: -0.00780487060546875|unsuper_loss: 0.0
-average reward score: 5.21875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.89%) |Training time=0.47s (21.59%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7880|ppo_ep: 1|act_loss: 0.02557373046875|cri_loss: 0.0131072998046875|unsuper_loss: 0.0
-average reward score: 5.421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.83%) |Training time=0.47s (21.54%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7881|ppo_ep: 1|act_loss: 0.0496826171875|cri_loss: 0.025543212890625|unsuper_loss: 0.0
-average reward score: 5.15234375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.47%) |Training time=0.48s (22.03%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7882|ppo_ep: 1|act_loss: 0.00325775146484375|cri_loss: 0.001895904541015625|unsuper_loss: 0.0
-average reward score: 5.42578125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.26%) |Training time=0.48s (22.16%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7883|ppo_ep: 1|act_loss: 0.013031005859375|cri_loss: 0.006710052490234375|unsuper_loss: 0.0
-average reward score: 5.0859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.68%) |Training time=0.47s (21.80%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7884|ppo_ep: 1|act_loss: 0.00453948974609375|cri_loss: 0.0025043487548828125|unsuper_loss: 0.0
-average reward score: 5.1875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.36%) |Training time=0.48s (22.04%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7885|ppo_ep: 1|act_loss: -0.0011043548583984375|cri_loss: -0.0003352165222167969|unsuper_loss: 0.0
-average reward score: 5.703125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.70%) |Training time=0.47s (21.75%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7886|ppo_ep: 1|act_loss: 0.020843505859375|cri_loss: 0.01062774658203125|unsuper_loss: 0.0
-average reward score: 5.5234375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.52%) |Training time=0.48s (21.89%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7887|ppo_ep: 1|act_loss: 0.002925872802734375|cri_loss: 0.0016040802001953125|unsuper_loss: 0.0
-average reward score: 5.76953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.83%) |Training time=0.47s (21.66%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7888|ppo_ep: 1|act_loss: 0.0274200439453125|cri_loss: 0.01441192626953125|unsuper_loss: 0.0
-average reward score: 5.78515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.77%) |Training time=0.47s (21.63%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56
-[2023-04-14 13:36:24,543] [INFO] [logging.py:96:log_dist] [Rank 0] step=7890, skipped=100, lr=[7.877658366289522e-08, 7.877658366289522e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:36:24,562] [INFO] [timer.py:199:stop] epoch=0/micro_step=7890/global_step=7890, RunningAvgSamplesPerSec=105.72515201238446, CurrSamplesPerSec=102.51301521834603, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:36:24,654] [INFO] [logging.py:96:log_dist] [Rank 0] step=7890, skipped=136, lr=[4.7288610829223645e-08, 4.7288610829223645e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7889|ppo_ep: 1|act_loss: -0.01506805419921875|cri_loss: -0.004730224609375|unsuper_loss: 0.0
-average reward score: 5.01953125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.69%) |Training time=0.47s (21.81%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7890|ppo_ep: 1|act_loss: 0.00989532470703125|cri_loss: 0.00505828857421875|unsuper_loss: 0.0
-average reward score: 5.7890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.67%) |Training time=0.47s (21.73%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7891|ppo_ep: 1|act_loss: 0.031524658203125|cri_loss: 0.016021728515625|unsuper_loss: 0.0
-average reward score: 5.61328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.73%) |Training time=0.47s (21.76%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7892|ppo_ep: 1|act_loss: 0.004886627197265625|cri_loss: 0.0025768280029296875|unsuper_loss: 0.0
-average reward score: 5.5546875
--------------------------------------------------------------------------------------
-|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.60s (71.64%) |Training time=0.49s (21.75%) |Others=0.15 (6.61%)|CurSamplesPerSec=14.32 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7893|ppo_ep: 1|act_loss: 0.0214385986328125|cri_loss: 0.0110321044921875|unsuper_loss: 0.0
-average reward score: 6.4765625
--------------------------------------------------------------------------------------
-|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.66s (74.48%) |Training time=0.47s (21.09%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.36 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7894|ppo_ep: 1|act_loss: -0.004444122314453125|cri_loss: -0.0020580291748046875|unsuper_loss: 0.0
-average reward score: 5.390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.65%) |Training time=0.47s (21.75%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7895|ppo_ep: 1|act_loss: 0.03582763671875|cri_loss: 0.018310546875|unsuper_loss: 0.0
-average reward score: 5.14453125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.60%) |Training time=0.48s (21.85%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7896|ppo_ep: 1|act_loss: 0.0079345703125|cri_loss: 0.005886077880859375|unsuper_loss: 0.0
-average reward score: 5.27734375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.59%) |Training time=0.48s (21.83%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7897|ppo_ep: 1|act_loss: 0.0352783203125|cri_loss: 0.0183258056640625|unsuper_loss: 0.0
-average reward score: 6.36328125
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.64s (72.27%) |Training time=0.51s (22.65%) |Others=0.12 (5.09%)|CurSamplesPerSec=14.12 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7898|ppo_ep: 1|act_loss: 0.043914794921875|cri_loss: 0.023712158203125|unsuper_loss: 0.0
-average reward score: 5.28515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.99%) |Training time=0.46s (21.38%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56
-[2023-04-14 13:36:46,507] [INFO] [logging.py:96:log_dist] [Rank 0] step=7900, skipped=100, lr=[7.54687237812874e-08, 7.54687237812874e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:36:46,526] [INFO] [timer.py:199:stop] epoch=0/micro_step=7900/global_step=7900, RunningAvgSamplesPerSec=105.7195812062558, CurrSamplesPerSec=103.41567842742634, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:36:46,618] [INFO] [logging.py:96:log_dist] [Rank 0] step=7900, skipped=136, lr=[4.544359286615785e-08, 4.544359286615785e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7899|ppo_ep: 1|act_loss: -0.022247314453125|cri_loss: -0.0109405517578125|unsuper_loss: 0.0
-average reward score: 5.23046875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.71%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7900|ppo_ep: 1|act_loss: 0.0120697021484375|cri_loss: 0.00836944580078125|unsuper_loss: 0.0
-average reward score: 5.29296875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.86%) |Training time=0.47s (21.53%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7901|ppo_ep: 1|act_loss: -0.007129669189453125|cri_loss: -0.003482818603515625|unsuper_loss: 0.0
-average reward score: 5.30078125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.90%) |Training time=0.47s (21.52%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7902|ppo_ep: 1|act_loss: 0.0109100341796875|cri_loss: 0.005550384521484375|unsuper_loss: 0.0
-average reward score: 5.29296875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.05%) |Training time=0.46s (21.34%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7903|ppo_ep: 1|act_loss: 0.01129913330078125|cri_loss: 0.005908966064453125|unsuper_loss: 0.0
-average reward score: 5.2421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.73%) |Training time=0.47s (21.67%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7904|ppo_ep: 1|act_loss: 0.0010099411010742188|cri_loss: 0.0005588531494140625|unsuper_loss: 0.0
-average reward score: 5.76171875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.73%) |Training time=0.47s (21.66%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7905|ppo_ep: 1|act_loss: 0.04443359375|cri_loss: 0.0255889892578125|unsuper_loss: 0.0
-average reward score: 4.86328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.87%) |Training time=0.47s (21.60%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7906|ppo_ep: 1|act_loss: 0.009063720703125|cri_loss: 0.004764556884765625|unsuper_loss: 0.0
-average reward score: 5.765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.71%) |Training time=0.47s (21.68%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7907|ppo_ep: 1|act_loss: -0.0076751708984375|cri_loss: -0.0036468505859375|unsuper_loss: 0.0
-average reward score: 5.1328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.98%) |Training time=0.46s (21.32%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7908|ppo_ep: 1|act_loss: -0.018890380859375|cri_loss: -0.00931549072265625|unsuper_loss: 0.0
-average reward score: 5.33984375
--------------------------------------------------------------------------------------
-|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.18%) |Training time=0.50s (21.11%) |Others=0.11 (4.71%)|CurSamplesPerSec=13.54 |AvgSamplesPerSec=14.56
-[2023-04-14 13:37:08,435] [INFO] [logging.py:96:log_dist] [Rank 0] step=7910, skipped=100, lr=[7.223126341081292e-08, 7.223126341081292e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:37:08,454] [INFO] [timer.py:199:stop] epoch=0/micro_step=7910/global_step=7910, RunningAvgSamplesPerSec=105.7167886290013, CurrSamplesPerSec=103.1705201890031, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:37:08,547] [INFO] [logging.py:96:log_dist] [Rank 0] step=7910, skipped=136, lr=[4.3634957350340844e-08, 4.3634957350340844e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7909|ppo_ep: 1|act_loss: -0.00423431396484375|cri_loss: -0.0020046234130859375|unsuper_loss: 0.0
-average reward score: 5.0703125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.74%) |Training time=0.47s (21.72%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7910|ppo_ep: 1|act_loss: 0.030242919921875|cri_loss: 0.015960693359375|unsuper_loss: 0.0
-average reward score: 5.3046875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.73%) |Training time=0.47s (21.67%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7911|ppo_ep: 1|act_loss: 0.00960540771484375|cri_loss: 0.0049896240234375|unsuper_loss: 0.0
-average reward score: 5.6015625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.01%) |Training time=0.47s (21.47%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7912|ppo_ep: 1|act_loss: 0.034393310546875|cri_loss: 0.017669677734375|unsuper_loss: 0.0
-average reward score: 5.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.46s (21.34%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7913|ppo_ep: 1|act_loss: -0.016571044921875|cri_loss: -0.0081634521484375|unsuper_loss: 0.0
-average reward score: 5.73828125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.05%) |Training time=0.47s (21.38%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7914|ppo_ep: 1|act_loss: -0.0011844635009765625|cri_loss: -0.00016069412231445312|unsuper_loss: 0.0
-average reward score: 4.765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.97%) |Training time=0.46s (21.40%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7915|ppo_ep: 1|act_loss: -0.01568603515625|cri_loss: -0.007717132568359375|unsuper_loss: 0.0
-average reward score: 5.6015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.94%) |Training time=0.46s (21.44%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7916|ppo_ep: 1|act_loss: 0.031219482421875|cri_loss: 0.0164794921875|unsuper_loss: 0.0
-average reward score: 5.03125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.75%) |Training time=0.47s (21.65%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7917|ppo_ep: 1|act_loss: 0.0123443603515625|cri_loss: 0.006290435791015625|unsuper_loss: 0.0
-average reward score: 5.88671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.73%) |Training time=0.47s (21.63%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7918|ppo_ep: 1|act_loss: 0.0018014907836914062|cri_loss: 0.001071929931640625|unsuper_loss: 0.0
-average reward score: 5.1015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.87%) |Training time=0.47s (21.51%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56
-[2023-04-14 13:37:30,149] [INFO] [logging.py:96:log_dist] [Rank 0] step=7920, skipped=100, lr=[6.906425053844542e-08, 6.906425053844542e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:37:30,168] [INFO] [timer.py:199:stop] epoch=0/micro_step=7920/global_step=7920, RunningAvgSamplesPerSec=105.7158984428223, CurrSamplesPerSec=105.03155851879676, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:37:30,260] [INFO] [logging.py:96:log_dist] [Rank 0] step=7920, skipped=136, lr=[4.186273109011374e-08, 4.186273109011374e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7919|ppo_ep: 1|act_loss: 0.0194854736328125|cri_loss: 0.0098876953125|unsuper_loss: 0.0
-average reward score: 4.953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.88%) |Training time=0.47s (21.59%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7920|ppo_ep: 1|act_loss: -0.015533447265625|cri_loss: -0.007617950439453125|unsuper_loss: 0.0
-average reward score: 5.390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.85%) |Training time=0.47s (21.52%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7921|ppo_ep: 1|act_loss: 0.0017957687377929688|cri_loss: 0.0020465850830078125|unsuper_loss: 0.0
-average reward score: 5.64453125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.57%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7922|ppo_ep: 1|act_loss: -0.0041046142578125|cri_loss: -0.0018215179443359375|unsuper_loss: 0.0
-average reward score: 5.73828125
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.60s (70.66%) |Training time=0.47s (20.87%) |Others=0.19 (8.47%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.56
-[2023-04-14 13:37:39,018] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 7923|ppo_ep: 1|act_loss: 0.0170135498046875|cri_loss: 0.00887298583984375|unsuper_loss: 0.0
-average reward score: 4.66796875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.47s (21.58%) |Others=0.09 (4.16%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56
-[2023-04-14 13:37:41,177] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 7924|ppo_ep: 1|act_loss: -0.002185821533203125|cri_loss: -0.001056671142578125|unsuper_loss: 0.0
-average reward score: 5.3359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.23%) |Training time=0.46s (21.51%) |Others=0.09 (4.26%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7925|ppo_ep: 1|act_loss: 0.0026702880859375|cri_loss: 0.0013885498046875|unsuper_loss: 0.0
-average reward score: 5.00390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.34%) |Training time=0.48s (22.10%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7926|ppo_ep: 1|act_loss: -0.0080108642578125|cri_loss: -0.0037822723388671875|unsuper_loss: 0.0
-average reward score: 5.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.17%) |Training time=0.47s (21.22%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7927|ppo_ep: 1|act_loss: -0.0149993896484375|cri_loss: -0.007312774658203125|unsuper_loss: 0.0
-average reward score: 4.66796875
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.38%) |Training time=0.46s (20.27%) |Others=0.10 (4.35%)|CurSamplesPerSec=14.17 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7928|ppo_ep: 1|act_loss: -0.014862060546875|cri_loss: -0.006984710693359375|unsuper_loss: 0.0
-average reward score: 4.99609375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.38%) |Training time=0.45s (20.95%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.56
-[2023-04-14 13:37:51,969] [INFO] [logging.py:96:log_dist] [Rank 0] step=7930, skipped=100, lr=[6.596773210695512e-08, 6.596773210695512e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:37:51,987] [INFO] [timer.py:199:stop] epoch=0/micro_step=7930/global_step=7930, RunningAvgSamplesPerSec=105.71659977392, CurrSamplesPerSec=109.8774545198531, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:37:52,080] [INFO] [logging.py:96:log_dist] [Rank 0] step=7930, skipped=138, lr=[4.047118241664511e-08, 4.047118241664511e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7929|ppo_ep: 1|act_loss: 0.017242431640625|cri_loss: 0.009002685546875|unsuper_loss: 0.0
-average reward score: 5.3515625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.17%) |Training time=0.45s (21.17%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7930|ppo_ep: 1|act_loss: -0.021514892578125|cri_loss: -0.010650634765625|unsuper_loss: 0.0
-average reward score: 5.29296875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.36%) |Training time=0.45s (20.96%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7931|ppo_ep: 1|act_loss: 0.025634765625|cri_loss: 0.0132598876953125|unsuper_loss: 0.0
-average reward score: 6.0390625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.51%) |Training time=0.45s (20.91%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7932|ppo_ep: 1|act_loss: 0.01055145263671875|cri_loss: 0.005443572998046875|unsuper_loss: 0.0
-average reward score: 5.9140625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.24%) |Training time=0.45s (21.09%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7933|ppo_ep: 1|act_loss: -0.01123046875|cri_loss: -0.005527496337890625|unsuper_loss: 0.0
-average reward score: 6.484375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.25%) |Training time=0.45s (21.09%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7934|ppo_ep: 1|act_loss: 0.0125274658203125|cri_loss: 0.00664520263671875|unsuper_loss: 0.0
-average reward score: 5.55859375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.84%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7935|ppo_ep: 1|act_loss: 0.00568389892578125|cri_loss: 0.003032684326171875|unsuper_loss: 0.0
-average reward score: 6.0234375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.03%) |Training time=0.49s (22.20%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7936|ppo_ep: 1|act_loss: -0.01641845703125|cri_loss: -0.00799560546875|unsuper_loss: 0.0
-average reward score: 4.42578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.99%) |Training time=0.46s (21.27%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7937|ppo_ep: 1|act_loss: 0.0003528594970703125|cri_loss: 0.00023508071899414062|unsuper_loss: 0.0
-average reward score: 5.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.61s (69.68%) |Training time=0.60s (26.04%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7938|ppo_ep: 1|act_loss: 0.043304443359375|cri_loss: 0.026031494140625|unsuper_loss: 0.0
-average reward score: 5.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.02%) |Training time=0.46s (21.32%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56
-[2023-04-14 13:38:13,739] [INFO] [logging.py:96:log_dist] [Rank 0] step=7940, skipped=100, lr=[6.294175401421511e-08, 6.294175401421511e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:38:13,757] [INFO] [timer.py:199:stop] epoch=0/micro_step=7940/global_step=7940, RunningAvgSamplesPerSec=105.71327649972606, CurrSamplesPerSec=107.65157898873818, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:38:13,849] [INFO] [logging.py:96:log_dist] [Rank 0] step=7940, skipped=138, lr=[3.876455865061551e-08, 3.876455865061551e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7939|ppo_ep: 1|act_loss: -0.003467559814453125|cri_loss: -0.0015869140625|unsuper_loss: 0.0
-average reward score: 5.92578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.23%) |Training time=0.46s (21.24%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7940|ppo_ep: 1|act_loss: -0.00472259521484375|cri_loss: -0.0023040771484375|unsuper_loss: 0.0
-average reward score: 5.80859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.28%) |Training time=0.43s (20.00%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7941|ppo_ep: 1|act_loss: 0.0321044921875|cri_loss: 0.01654052734375|unsuper_loss: 0.0
-average reward score: 5.54296875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.09%) |Training time=0.44s (20.34%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7942|ppo_ep: 1|act_loss: 0.016448974609375|cri_loss: 0.008331298828125|unsuper_loss: 0.0
-average reward score: 5.58984375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.78%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7943|ppo_ep: 1|act_loss: -0.0034580230712890625|cri_loss: -0.00157928466796875|unsuper_loss: 0.0
-average reward score: 5.515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.61%) |Training time=0.47s (21.87%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7944|ppo_ep: 1|act_loss: -0.0009264945983886719|cri_loss: -2.09808349609375e-05|unsuper_loss: 0.0
-average reward score: 4.9375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.83%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7945|ppo_ep: 1|act_loss: 0.024078369140625|cri_loss: 0.01306915283203125|unsuper_loss: 0.0
-average reward score: 5.13671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.47%) |Training time=0.48s (22.01%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7946|ppo_ep: 1|act_loss: 0.002613067626953125|cri_loss: 0.00144195556640625|unsuper_loss: 0.0
-average reward score: 5.34375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.77%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7947|ppo_ep: 1|act_loss: 0.00836944580078125|cri_loss: 0.004421234130859375|unsuper_loss: 0.0
-average reward score: 4.83203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.46%) |Training time=0.47s (21.88%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7948|ppo_ep: 1|act_loss: -0.0103759765625|cri_loss: -0.005062103271484375|unsuper_loss: 0.0
-average reward score: 5.44140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.67%) |Training time=0.47s (21.68%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56
-[2023-04-14 13:38:35,426] [INFO] [logging.py:96:log_dist] [Rank 0] step=7950, skipped=100, lr=[5.998636111252047e-08, 5.998636111252047e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:38:35,444] [INFO] [timer.py:199:stop] epoch=0/micro_step=7950/global_step=7950, RunningAvgSamplesPerSec=105.71354108782916, CurrSamplesPerSec=107.05438952760315, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:38:35,537] [INFO] [logging.py:96:log_dist] [Rank 0] step=7950, skipped=138, lr=[3.709441633123367e-08, 3.709441633123367e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7949|ppo_ep: 1|act_loss: -0.00852203369140625|cri_loss: -0.004192352294921875|unsuper_loss: 0.0
-average reward score: 5.1484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.99%) |Training time=0.47s (21.49%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7950|ppo_ep: 1|act_loss: -0.00669097900390625|cri_loss: -0.0032520294189453125|unsuper_loss: 0.0
-average reward score: 5.88671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.76%) |Training time=0.47s (21.61%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7951|ppo_ep: 1|act_loss: -0.005435943603515625|cri_loss: -0.0026531219482421875|unsuper_loss: 0.0
-average reward score: 5.87890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.03%) |Training time=0.46s (21.42%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7952|ppo_ep: 1|act_loss: 0.0321044921875|cri_loss: 0.016448974609375|unsuper_loss: 0.0
-average reward score: 6.01953125
--------------------------------------------------------------------------------------
-|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.66%) |Training time=0.50s (21.06%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.58 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7953|ppo_ep: 1|act_loss: 0.006015777587890625|cri_loss: 0.0031280517578125|unsuper_loss: 0.0
-average reward score: 4.9921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.48s (22.05%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7954|ppo_ep: 1|act_loss: -0.0130157470703125|cri_loss: -0.00617218017578125|unsuper_loss: 0.0
-average reward score: 5.6484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.17%) |Training time=0.48s (22.21%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7955|ppo_ep: 1|act_loss: -0.00555419921875|cri_loss: -0.0027217864990234375|unsuper_loss: 0.0
-average reward score: 5.0703125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.23%) |Training time=0.48s (22.00%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7956|ppo_ep: 1|act_loss: -0.01708984375|cri_loss: -0.00836944580078125|unsuper_loss: 0.0
-average reward score: 5.19140625
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.67%) |Training time=0.48s (21.74%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.49 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7957|ppo_ep: 1|act_loss: -0.010009765625|cri_loss: -0.004848480224609375|unsuper_loss: 0.0
-average reward score: 5.65625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.54%) |Training time=0.45s (20.79%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7958|ppo_ep: 1|act_loss: 0.0113372802734375|cri_loss: 0.006534576416015625|unsuper_loss: 0.0
-average reward score: 5.64453125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.64%) |Training time=0.44s (20.67%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.56
-[2023-04-14 13:38:57,265] [INFO] [logging.py:96:log_dist] [Rank 0] step=7960, skipped=100, lr=[5.710159720792351e-08, 5.710159720792351e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:38:57,284] [INFO] [timer.py:199:stop] epoch=0/micro_step=7960/global_step=7960, RunningAvgSamplesPerSec=105.71254016421052, CurrSamplesPerSec=113.03297482527259, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:38:57,376] [INFO] [logging.py:96:log_dist] [Rank 0] step=7960, skipped=138, lr=[3.5460780214038593e-08, 3.5460780214038593e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7959|ppo_ep: 1|act_loss: 0.0247802734375|cri_loss: 0.01277923583984375|unsuper_loss: 0.0
-average reward score: 5.609375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.63%) |Training time=0.45s (20.81%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7960|ppo_ep: 1|act_loss: -0.0133819580078125|cri_loss: -0.00598907470703125|unsuper_loss: 0.0
-average reward score: 5.796875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.47%) |Training time=0.45s (20.87%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7961|ppo_ep: 1|act_loss: -0.01129913330078125|cri_loss: -0.0054931640625|unsuper_loss: 0.0
-average reward score: 5.17578125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.56%) |Training time=0.44s (20.75%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.56
-epoch: 0|step: 7962|ppo_ep: 1|act_loss: -0.004425048828125|cri_loss: -0.0021190643310546875|unsuper_loss: 0.0
-average reward score: 5.2109375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.61%) |Training time=0.44s (20.73%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.57
-epoch: 0|step: 7963|ppo_ep: 1|act_loss: -0.0086669921875|cri_loss: -0.003204345703125|unsuper_loss: 0.0
-average reward score: 5.953125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.60%) |Training time=0.44s (20.80%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.57
-epoch: 0|step: 7964|ppo_ep: 1|act_loss: 0.00540924072265625|cri_loss: 0.002834320068359375|unsuper_loss: 0.0
-average reward score: 5.34375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.44%) |Training time=0.45s (20.88%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.57
-epoch: 0|step: 7965|ppo_ep: 1|act_loss: 0.0267333984375|cri_loss: 0.01537322998046875|unsuper_loss: 0.0
-average reward score: 5.1953125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.68%) |Training time=0.44s (20.73%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.57
-epoch: 0|step: 7966|ppo_ep: 1|act_loss: 0.00862884521484375|cri_loss: 0.005084991455078125|unsuper_loss: 0.0
-average reward score: 6.19921875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.93%) |Training time=0.44s (20.38%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.57
-epoch: 0|step: 7967|ppo_ep: 1|act_loss: -0.0021800994873046875|cri_loss: -0.0008406639099121094|unsuper_loss: 0.0
-average reward score: 5.03515625
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.62%) |Training time=0.44s (19.02%) |Others=0.10 (4.35%)|CurSamplesPerSec=13.85 |AvgSamplesPerSec=14.57
-epoch: 0|step: 7968|ppo_ep: 1|act_loss: -0.0058746337890625|cri_loss: -0.0027408599853515625|unsuper_loss: 0.0
-average reward score: 6.0234375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.97%) |Training time=0.44s (20.41%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.57
-[2023-04-14 13:39:18,820] [INFO] [logging.py:96:log_dist] [Rank 0] step=7970, skipped=100, lr=[5.4287505059585056e-08, 5.4287505059585056e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:39:18,838] [INFO] [timer.py:199:stop] epoch=0/micro_step=7970/global_step=7970, RunningAvgSamplesPerSec=105.72253965619953, CurrSamplesPerSec=114.09618176810422, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:39:18,931] [INFO] [logging.py:96:log_dist] [Rank 0] step=7970, skipped=138, lr=[3.38636745134574e-08, 3.38636745134574e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7969|ppo_ep: 1|act_loss: -0.0057830810546875|cri_loss: -0.0027828216552734375|unsuper_loss: 0.0
-average reward score: 5.28125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.68%) |Training time=0.44s (20.74%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.57
-epoch: 0|step: 7970|ppo_ep: 1|act_loss: 0.0011510848999023438|cri_loss: 0.0006318092346191406|unsuper_loss: 0.0
-average reward score: 4.9609375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.70%) |Training time=0.44s (20.60%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.57
-epoch: 0|step: 7971|ppo_ep: 1|act_loss: 0.027252197265625|cri_loss: 0.01389312744140625|unsuper_loss: 0.0
-average reward score: 4.7421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.55%) |Training time=0.45s (20.86%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.57
-epoch: 0|step: 7972|ppo_ep: 1|act_loss: -0.0053253173828125|cri_loss: -0.00258636474609375|unsuper_loss: 0.0
-average reward score: 5.6875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.86%) |Training time=0.44s (20.41%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.57
-epoch: 0|step: 7973|ppo_ep: 1|act_loss: -0.009918212890625|cri_loss: -0.004913330078125|unsuper_loss: 0.0
-average reward score: 5.375
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.27%) |Training time=0.43s (20.12%) |Others=0.10 (4.61%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.57
-epoch: 0|step: 7974|ppo_ep: 1|act_loss: 0.059112548828125|cri_loss: 0.0310211181640625|unsuper_loss: 0.0
-average reward score: 5.05078125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.58%) |Training time=0.44s (20.75%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.57
-epoch: 0|step: 7975|ppo_ep: 1|act_loss: 0.00844573974609375|cri_loss: 0.004795074462890625|unsuper_loss: 0.0
-average reward score: 5.50390625
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.86%) |Training time=0.44s (20.55%) |Others=0.10 (4.59%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.57
-epoch: 0|step: 7976|ppo_ep: 1|act_loss: -0.0298004150390625|cri_loss: -0.01467132568359375|unsuper_loss: 0.0
-average reward score: 5.1953125
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.82%) |Training time=0.44s (20.47%) |Others=0.10 (4.71%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.57
-epoch: 0|step: 7977|ppo_ep: 1|act_loss: 0.008209228515625|cri_loss: 0.00433349609375|unsuper_loss: 0.0
-average reward score: 5.1953125
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.86%) |Training time=0.44s (20.53%) |Others=0.10 (4.61%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.57
-epoch: 0|step: 7978|ppo_ep: 1|act_loss: -0.0465087890625|cri_loss: -0.0218505859375|unsuper_loss: 0.0
-average reward score: 5.5
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.80%) |Training time=0.44s (20.52%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.57
-[2023-04-14 13:39:40,188] [INFO] [logging.py:96:log_dist] [Rank 0] step=7980, skipped=100, lr=[5.154412637913806e-08, 5.154412637913806e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:39:40,206] [INFO] [timer.py:199:stop] epoch=0/micro_step=7980/global_step=7980, RunningAvgSamplesPerSec=105.7339108712191, CurrSamplesPerSec=115.24871199527388, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:39:40,299] [INFO] [logging.py:96:log_dist] [Rank 0] step=7980, skipped=138, lr=[3.230312290245008e-08, 3.230312290245008e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7979|ppo_ep: 1|act_loss: 0.0009937286376953125|cri_loss: 0.0007677078247070312|unsuper_loss: 0.0
-average reward score: 5.5
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.76%) |Training time=0.44s (20.65%) |Others=0.10 (4.59%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.57
-epoch: 0|step: 7980|ppo_ep: 1|act_loss: -0.01158905029296875|cri_loss: -0.005451202392578125|unsuper_loss: 0.0
-average reward score: 5.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.73%) |Training time=0.44s (20.58%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.57
-epoch: 0|step: 7981|ppo_ep: 1|act_loss: -0.03515625|cri_loss: -0.01605224609375|unsuper_loss: 0.0
-average reward score: 5.3671875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.98%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.57
-epoch: 0|step: 7982|ppo_ep: 1|act_loss: 0.022247314453125|cri_loss: 0.01145172119140625|unsuper_loss: 0.0
-average reward score: 5.3359375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.63%) |Training time=0.44s (20.68%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.57
-epoch: 0|step: 7983|ppo_ep: 1|act_loss: -0.014556884765625|cri_loss: -0.007183074951171875|unsuper_loss: 0.0
-average reward score: 5.1328125
--------------------------------------------------------------------------------------
-|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.69s (73.36%) |Training time=0.51s (22.29%) |Others=0.10 (4.35%)|CurSamplesPerSec=13.90 |AvgSamplesPerSec=14.57
-epoch: 0|step: 7984|ppo_ep: 1|act_loss: -0.015045166015625|cri_loss: -0.0073394775390625|unsuper_loss: 0.0
-average reward score: 5.13671875
--------------------------------------------------------------------------------------
-|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.76%) |Training time=0.44s (20.57%) |Others=0.10 (4.67%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.57
-epoch: 0|step: 7985|ppo_ep: 1|act_loss: 0.075439453125|cri_loss: 0.042724609375|unsuper_loss: 0.0
-average reward score: 5.328125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.75%) |Training time=0.44s (20.47%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.57
-epoch: 0|step: 7986|ppo_ep: 1|act_loss: 0.00634002685546875|cri_loss: 0.003490447998046875|unsuper_loss: 0.0
-average reward score: 5.75390625
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.31%) |Training time=0.47s (21.12%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.57
-epoch: 0|step: 7987|ppo_ep: 1|act_loss: -0.03533935546875|cri_loss: -0.0172882080078125|unsuper_loss: 0.0
-average reward score: 4.9921875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.97%) |Training time=0.44s (20.34%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.57
-epoch: 0|step: 7988|ppo_ep: 1|act_loss: -0.023040771484375|cri_loss: -0.01068115234375|unsuper_loss: 0.0
-average reward score: 5.91015625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.34%) |Training time=0.45s (20.95%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.57
-[2023-04-14 13:40:01,805] [INFO] [logging.py:96:log_dist] [Rank 0] step=7990, skipped=100, lr=[4.887150183007368e-08, 4.887150183007368e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:40:01,823] [INFO] [timer.py:199:stop] epoch=0/micro_step=7990/global_step=7990, RunningAvgSamplesPerSec=105.74197867872607, CurrSamplesPerSec=112.59064195032909, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:40:01,916] [INFO] [logging.py:96:log_dist] [Rank 0] step=7990, skipped=138, lr=[3.077914851215585e-08, 3.077914851215585e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7989|ppo_ep: 1|act_loss: -0.03289794921875|cri_loss: -0.016021728515625|unsuper_loss: 0.0
-average reward score: 4.76171875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.52%) |Training time=0.45s (20.90%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.57
-epoch: 0|step: 7990|ppo_ep: 1|act_loss: -0.03704833984375|cri_loss: -0.016448974609375|unsuper_loss: 0.0
-average reward score: 5.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.88%) |Training time=0.44s (20.43%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.57
-epoch: 0|step: 7991|ppo_ep: 1|act_loss: 0.00550079345703125|cri_loss: 0.003116607666015625|unsuper_loss: 0.0
-average reward score: 5.703125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.54%) |Training time=0.45s (20.87%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.57
-epoch: 0|step: 7992|ppo_ep: 1|act_loss: -0.0137481689453125|cri_loss: -0.006755828857421875|unsuper_loss: 0.0
-average reward score: 5.78125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.64%) |Training time=0.44s (20.66%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.57
-epoch: 0|step: 7993|ppo_ep: 1|act_loss: -0.005077362060546875|cri_loss: -0.00238800048828125|unsuper_loss: 0.0
-average reward score: 5.66015625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.78%) |Training time=0.44s (20.63%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.57
-epoch: 0|step: 7994|ppo_ep: 1|act_loss: -0.0119781494140625|cri_loss: -0.005706787109375|unsuper_loss: 0.0
-average reward score: 6.09765625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.62%) |Training time=0.44s (20.70%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.57
-epoch: 0|step: 7995|ppo_ep: 1|act_loss: 0.0263214111328125|cri_loss: 0.01381683349609375|unsuper_loss: 0.0
-average reward score: 6.078125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.54%) |Training time=0.45s (20.87%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.57
-epoch: 0|step: 7996|ppo_ep: 1|act_loss: -0.0144805908203125|cri_loss: -0.007122039794921875|unsuper_loss: 0.0
-average reward score: 4.9921875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.81%) |Training time=0.44s (20.48%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.57
-epoch: 0|step: 7997|ppo_ep: 1|act_loss: -0.007595062255859375|cri_loss: -0.003307342529296875|unsuper_loss: 0.0
-average reward score: 5.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.64%) |Training time=0.44s (20.69%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.57
-epoch: 0|step: 7998|ppo_ep: 1|act_loss: 0.001560211181640625|cri_loss: 0.0010843276977539062|unsuper_loss: 0.0
-average reward score: 5.5625
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.38%) |Training time=0.45s (19.30%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.80 |AvgSamplesPerSec=14.57
-[2023-04-14 13:40:23,385] [INFO] [logging.py:96:log_dist] [Rank 0] step=8000, skipped=100, lr=[4.6269671027135476e-08, 4.6269671027135476e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:40:23,403] [INFO] [timer.py:199:stop] epoch=0/micro_step=8000/global_step=8000, RunningAvgSamplesPerSec=105.75146589136898, CurrSamplesPerSec=110.35337026640816, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:40:23,496] [INFO] [logging.py:96:log_dist] [Rank 0] step=8000, skipped=138, lr=[2.9291773931551525e-08, 2.9291773931551525e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 7999|ppo_ep: 1|act_loss: 0.01483154296875|cri_loss: 0.007595062255859375|unsuper_loss: 0.0
-average reward score: 6.6484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.47%) |Training time=0.45s (21.00%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8000|ppo_ep: 1|act_loss: -0.00762176513671875|cri_loss: -0.00371551513671875|unsuper_loss: 0.0
-average reward score: 5.51953125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.78%) |Training time=0.44s (20.52%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8001|ppo_ep: 1|act_loss: -0.013580322265625|cri_loss: -0.006587982177734375|unsuper_loss: 0.0
-average reward score: 5.49609375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.74%) |Training time=0.44s (20.66%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8002|ppo_ep: 1|act_loss: -0.0023956298828125|cri_loss: -0.001026153564453125|unsuper_loss: 0.0
-average reward score: 5.421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.99%) |Training time=0.46s (21.35%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8003|ppo_ep: 1|act_loss: -0.00656890869140625|cri_loss: -0.0019073486328125|unsuper_loss: 0.0
-average reward score: 5.62109375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.36%) |Training time=0.48s (22.09%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8004|ppo_ep: 1|act_loss: -0.00583648681640625|cri_loss: -0.002777099609375|unsuper_loss: 0.0
-average reward score: 5.42578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.48s (21.97%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8005|ppo_ep: 1|act_loss: -0.00774383544921875|cri_loss: -0.00351715087890625|unsuper_loss: 0.0
-average reward score: 5.71875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.28%) |Training time=0.48s (22.19%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8006|ppo_ep: 1|act_loss: 0.00832366943359375|cri_loss: 0.00452423095703125|unsuper_loss: 0.0
-average reward score: 4.890625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.34%) |Training time=0.48s (22.02%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8007|ppo_ep: 1|act_loss: 0.0088043212890625|cri_loss: 0.0045166015625|unsuper_loss: 0.0
-average reward score: 5.5390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.23%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8008|ppo_ep: 1|act_loss: -0.0028076171875|cri_loss: -0.00128936767578125|unsuper_loss: 0.0
-average reward score: 6.09375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.48s (21.95%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.57
-[2023-04-14 13:40:45,038] [INFO] [logging.py:96:log_dist] [Rank 0] step=8010, skipped=100, lr=[4.373867253573118e-08, 4.373867253573118e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:40:45,056] [INFO] [timer.py:199:stop] epoch=0/micro_step=8010/global_step=8010, RunningAvgSamplesPerSec=105.74922781707347, CurrSamplesPerSec=100.61894819858148, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:40:45,149] [INFO] [logging.py:96:log_dist] [Rank 0] step=8010, skipped=138, lr=[2.7841021207116736e-08, 2.7841021207116736e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 8009|ppo_ep: 1|act_loss: -0.0247039794921875|cri_loss: -0.01165771484375|unsuper_loss: 0.0
-average reward score: 5.078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.40%) |Training time=0.48s (22.11%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8010|ppo_ep: 1|act_loss: -0.025115966796875|cri_loss: -0.01239776611328125|unsuper_loss: 0.0
-average reward score: 5.34765625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.95%) |Training time=0.49s (22.45%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8011|ppo_ep: 1|act_loss: 0.02227783203125|cri_loss: 0.01142120361328125|unsuper_loss: 0.0
-average reward score: 6.1171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.48s (22.05%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8012|ppo_ep: 1|act_loss: -0.01776123046875|cri_loss: -0.00848388671875|unsuper_loss: 0.0
-average reward score: 5.2734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.26%) |Training time=0.48s (22.13%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8013|ppo_ep: 1|act_loss: -0.00836944580078125|cri_loss: -0.00402069091796875|unsuper_loss: 0.0
-average reward score: 5.55859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (21.99%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8014|ppo_ep: 1|act_loss: -0.02996826171875|cri_loss: -0.01462554931640625|unsuper_loss: 0.0
-average reward score: 4.5703125
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.12%) |Training time=0.48s (20.53%) |Others=0.10 (4.35%)|CurSamplesPerSec=13.65 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8015|ppo_ep: 1|act_loss: 0.020477294921875|cri_loss: 0.0107574462890625|unsuper_loss: 0.0
-average reward score: 5.6328125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.93%) |Training time=0.47s (21.54%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8016|ppo_ep: 1|act_loss: -0.00269317626953125|cri_loss: -0.0012426376342773438|unsuper_loss: 0.0
-average reward score: 5.9140625
--------------------------------------------------------------------------------------
-|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.01%) |Training time=0.49s (21.61%) |Others=0.10 (4.38%)|CurSamplesPerSec=14.00 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8017|ppo_ep: 1|act_loss: 0.0018358230590820312|cri_loss: 0.0013628005981445312|unsuper_loss: 0.0
-average reward score: 5.3984375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.06%) |Training time=0.49s (22.44%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8018|ppo_ep: 1|act_loss: -0.00238037109375|cri_loss: -0.0007963180541992188|unsuper_loss: 0.0
-average reward score: 5.1796875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.33%) |Training time=0.48s (22.07%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.57
-[2023-04-14 13:41:07,086] [INFO] [logging.py:96:log_dist] [Rank 0] step=8020, skipped=100, lr=[4.127854387136491e-08, 4.127854387136491e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:41:07,104] [INFO] [timer.py:199:stop] epoch=0/micro_step=8020/global_step=8020, RunningAvgSamplesPerSec=105.74197847665799, CurrSamplesPerSec=100.35315457578353, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:41:07,197] [INFO] [logging.py:96:log_dist] [Rank 0] step=8020, skipped=138, lr=[2.6426911842506175e-08, 2.6426911842506175e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 8019|ppo_ep: 1|act_loss: -0.011383056640625|cri_loss: -0.00542449951171875|unsuper_loss: 0.0
-average reward score: 5.5859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.37%) |Training time=0.48s (22.14%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8020|ppo_ep: 1|act_loss: 0.0060577392578125|cri_loss: 0.003360748291015625|unsuper_loss: 0.0
-average reward score: 5.27734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.48s (22.00%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8021|ppo_ep: 1|act_loss: -0.01690673828125|cri_loss: -0.0083465576171875|unsuper_loss: 0.0
-average reward score: 5.0859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.48s (21.95%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8022|ppo_ep: 1|act_loss: -0.04150390625|cri_loss: -0.02032470703125|unsuper_loss: 0.0
-average reward score: 6.12109375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.46%) |Training time=0.48s (21.92%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8023|ppo_ep: 1|act_loss: -0.00159454345703125|cri_loss: -0.000701904296875|unsuper_loss: 0.0
-average reward score: 5.609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.94%) |Training time=0.49s (22.41%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8024|ppo_ep: 1|act_loss: -0.0018596649169921875|cri_loss: -0.0006575584411621094|unsuper_loss: 0.0
-average reward score: 5.93359375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.03%) |Training time=0.49s (22.40%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.57
-[2023-04-14 13:41:20,232] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 8025|ppo_ep: 1|act_loss: 0.010040283203125|cri_loss: 0.00547027587890625|unsuper_loss: 0.0
-average reward score: 5.359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.48s (22.45%) |Others=0.09 (4.06%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.57
-[2023-04-14 13:41:22,396] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 8026|ppo_ep: 1|act_loss: -0.029541015625|cri_loss: -0.0144195556640625|unsuper_loss: 0.0
-average reward score: 5.2109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.08%) |Training time=0.49s (22.80%) |Others=0.09 (4.12%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8027|ppo_ep: 1|act_loss: 0.00446319580078125|cri_loss: 0.0026836395263671875|unsuper_loss: 0.0
-average reward score: 5.82421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.80%) |Training time=0.49s (22.60%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8028|ppo_ep: 1|act_loss: -0.0071868896484375|cri_loss: -0.0032958984375|unsuper_loss: 0.0
-average reward score: 5.45703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.48s (22.27%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57
-[2023-04-14 13:41:28,805] [INFO] [logging.py:96:log_dist] [Rank 0] step=8030, skipped=100, lr=[3.888932149907788e-08, 3.888932149907788e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:41:28,824] [INFO] [timer.py:199:stop] epoch=0/micro_step=8030/global_step=8030, RunningAvgSamplesPerSec=105.73302555462291, CurrSamplesPerSec=96.34630702129675, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:41:28,916] [INFO] [logging.py:96:log_dist] [Rank 0] step=8030, skipped=140, lr=[2.5322021673613308e-08, 2.5322021673613308e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 8029|ppo_ep: 1|act_loss: 0.0107421875|cri_loss: 0.00566864013671875|unsuper_loss: 0.0
-average reward score: 5.796875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.78%) |Training time=0.49s (22.73%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8030|ppo_ep: 1|act_loss: 0.00165557861328125|cri_loss: 0.0010852813720703125|unsuper_loss: 0.0
-average reward score: 5.5234375
--------------------------------------------------------------------------------------
-|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.68%) |Training time=0.49s (21.04%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8031|ppo_ep: 1|act_loss: -0.0274658203125|cri_loss: -0.01277923583984375|unsuper_loss: 0.0
-average reward score: 5.328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.05%) |Training time=0.49s (22.32%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8032|ppo_ep: 1|act_loss: -0.019622802734375|cri_loss: -0.009552001953125|unsuper_loss: 0.0
-average reward score: 5.890625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.72%) |Training time=0.50s (22.70%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8033|ppo_ep: 1|act_loss: 0.005889892578125|cri_loss: 0.0031986236572265625|unsuper_loss: 0.0
-average reward score: 5.37109375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.81%) |Training time=0.49s (22.57%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8034|ppo_ep: 1|act_loss: 0.02691650390625|cri_loss: 0.0142822265625|unsuper_loss: 0.0
-average reward score: 5.5703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.94%) |Training time=0.49s (22.44%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8035|ppo_ep: 1|act_loss: -0.005855560302734375|cri_loss: -0.0028533935546875|unsuper_loss: 0.0
-average reward score: 5.58203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.98%) |Training time=0.49s (22.49%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8036|ppo_ep: 1|act_loss: -0.0105743408203125|cri_loss: -0.0033416748046875|unsuper_loss: 0.0
-average reward score: 5.62109375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.61%) |Training time=0.50s (22.79%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8037|ppo_ep: 1|act_loss: -0.0026702880859375|cri_loss: -0.001216888427734375|unsuper_loss: 0.0
-average reward score: 5.34765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.96%) |Training time=0.49s (22.41%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8038|ppo_ep: 1|act_loss: -0.0136566162109375|cri_loss: -0.006710052490234375|unsuper_loss: 0.0
-average reward score: 5.640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.75%) |Training time=0.49s (22.63%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.57
-[2023-04-14 13:41:50,718] [INFO] [logging.py:96:log_dist] [Rank 0] step=8040, skipped=100, lr=[3.657104083291008e-08, 3.657104083291008e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:41:50,736] [INFO] [timer.py:199:stop] epoch=0/micro_step=8040/global_step=8040, RunningAvgSamplesPerSec=105.72194107019268, CurrSamplesPerSec=98.03218566099805, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:41:50,828] [INFO] [logging.py:96:log_dist] [Rank 0] step=8040, skipped=140, lr=[2.3973922811987295e-08, 2.3973922811987295e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 8039|ppo_ep: 1|act_loss: 0.041595458984375|cri_loss: 0.0220184326171875|unsuper_loss: 0.0
-average reward score: 4.953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.84%) |Training time=0.49s (22.56%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8040|ppo_ep: 1|act_loss: -0.011566162109375|cri_loss: -0.0054473876953125|unsuper_loss: 0.0
-average reward score: 4.96875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.97%) |Training time=0.48s (22.39%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8041|ppo_ep: 1|act_loss: -0.003047943115234375|cri_loss: -0.0013294219970703125|unsuper_loss: 0.0
-average reward score: 5.26953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.72%) |Training time=0.49s (22.68%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8042|ppo_ep: 1|act_loss: -0.0085906982421875|cri_loss: -0.0038299560546875|unsuper_loss: 0.0
-average reward score: 5.80859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.76%) |Training time=0.49s (22.61%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8043|ppo_ep: 1|act_loss: 0.00794219970703125|cri_loss: 0.004283905029296875|unsuper_loss: 0.0
-average reward score: 5.86328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.88%) |Training time=0.49s (22.58%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8044|ppo_ep: 1|act_loss: -0.002685546875|cri_loss: -0.000995635986328125|unsuper_loss: 0.0
-average reward score: 6.1015625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.66%) |Training time=0.50s (22.70%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8045|ppo_ep: 1|act_loss: -0.0005998611450195312|cri_loss: -0.00021529197692871094|unsuper_loss: 0.0
-average reward score: 5.3828125
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.28%) |Training time=0.51s (23.17%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.44 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8046|ppo_ep: 1|act_loss: 0.00278472900390625|cri_loss: 0.001537322998046875|unsuper_loss: 0.0
-average reward score: 5.6328125
--------------------------------------------------------------------------------------
-|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.75s (73.86%) |Training time=0.52s (21.89%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.54 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8047|ppo_ep: 1|act_loss: 0.0053863525390625|cri_loss: 0.0028438568115234375|unsuper_loss: 0.0
-average reward score: 5.63671875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.77%) |Training time=0.49s (22.68%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8048|ppo_ep: 1|act_loss: 0.0013275146484375|cri_loss: 0.0008149147033691406|unsuper_loss: 0.0
-average reward score: 5.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.68%) |Training time=0.49s (22.74%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.57
-[2023-04-14 13:42:12,673] [INFO] [logging.py:96:log_dist] [Rank 0] step=8050, skipped=100, lr=[3.4323736235372594e-08, 3.4323736235372594e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:42:12,691] [INFO] [timer.py:199:stop] epoch=0/micro_step=8050/global_step=8050, RunningAvgSamplesPerSec=105.708434479618, CurrSamplesPerSec=97.58464677133455, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:42:12,784] [INFO] [logging.py:96:log_dist] [Rank 0] step=8050, skipped=140, lr=[2.266252462991253e-08, 2.266252462991253e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 8049|ppo_ep: 1|act_loss: 0.017578125|cri_loss: 0.0091705322265625|unsuper_loss: 0.0
-average reward score: 5.484375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.73%) |Training time=0.49s (22.59%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8050|ppo_ep: 1|act_loss: -0.001239776611328125|cri_loss: -0.0003724098205566406|unsuper_loss: 0.0
-average reward score: 5.4375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.04%) |Training time=0.48s (22.33%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8051|ppo_ep: 1|act_loss: 0.0269927978515625|cri_loss: 0.0146484375|unsuper_loss: 0.0
-average reward score: 4.96484375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.20%) |Training time=0.50s (22.84%) |Others=0.11 (4.96%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8052|ppo_ep: 1|act_loss: -0.0083770751953125|cri_loss: -0.004146575927734375|unsuper_loss: 0.0
-average reward score: 5.4921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.95%) |Training time=0.49s (22.41%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8053|ppo_ep: 1|act_loss: 0.001995086669921875|cri_loss: 0.0011777877807617188|unsuper_loss: 0.0
-average reward score: 5.80078125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.95%) |Training time=0.49s (22.43%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8054|ppo_ep: 1|act_loss: -0.0205535888671875|cri_loss: -0.00970458984375|unsuper_loss: 0.0
-average reward score: 5.7421875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.84%) |Training time=0.49s (22.56%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8055|ppo_ep: 1|act_loss: 0.00510406494140625|cri_loss: 0.0027561187744140625|unsuper_loss: 0.0
-average reward score: 5.0234375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.99%) |Training time=0.49s (22.45%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8056|ppo_ep: 1|act_loss: -0.01055908203125|cri_loss: -0.004421234130859375|unsuper_loss: 0.0
-average reward score: 5.359375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.79%) |Training time=0.49s (22.61%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8057|ppo_ep: 1|act_loss: 0.002025604248046875|cri_loss: 0.0011615753173828125|unsuper_loss: 0.0
-average reward score: 6.08984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.05%) |Training time=0.49s (22.42%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8058|ppo_ep: 1|act_loss: 0.00799560546875|cri_loss: 0.00431060791015625|unsuper_loss: 0.0
-average reward score: 5.31640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.71%) |Training time=0.49s (22.67%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.57
-[2023-04-14 13:42:34,383] [INFO] [logging.py:96:log_dist] [Rank 0] step=8060, skipped=100, lr=[3.21474410169414e-08, 3.21474410169414e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:42:34,401] [INFO] [timer.py:199:stop] epoch=0/micro_step=8060/global_step=8060, RunningAvgSamplesPerSec=105.69826892905085, CurrSamplesPerSec=98.67971046991659, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:42:34,494] [INFO] [logging.py:96:log_dist] [Rank 0] step=8060, skipped=140, lr=[2.1387846565474047e-08, 2.1387846565474047e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 8059|ppo_ep: 1|act_loss: 0.028778076171875|cri_loss: 0.014984130859375|unsuper_loss: 0.0
-average reward score: 5.05078125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.89%) |Training time=0.49s (22.49%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8060|ppo_ep: 1|act_loss: -0.0341796875|cri_loss: -0.0166473388671875|unsuper_loss: 0.0
-average reward score: 4.65625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.64%) |Training time=0.50s (22.76%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8061|ppo_ep: 1|act_loss: 0.0007905960083007812|cri_loss: 0.0005269050598144531|unsuper_loss: 0.0
-average reward score: 4.9375
--------------------------------------------------------------------------------------
-|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.25%) |Training time=0.51s (21.57%) |Others=0.10 (4.18%)|CurSamplesPerSec=13.60 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8062|ppo_ep: 1|act_loss: 0.0484619140625|cri_loss: 0.028228759765625|unsuper_loss: 0.0
-average reward score: 5.69140625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.78%) |Training time=0.49s (22.61%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8063|ppo_ep: 1|act_loss: 0.00508880615234375|cri_loss: 0.0027179718017578125|unsuper_loss: 0.0
-average reward score: 5.53125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.90%) |Training time=0.49s (22.59%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8064|ppo_ep: 1|act_loss: -0.04681396484375|cri_loss: -0.0221099853515625|unsuper_loss: 0.0
-average reward score: 5.44921875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.33%) |Training time=0.50s (23.00%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8065|ppo_ep: 1|act_loss: 0.001220703125|cri_loss: 0.001300811767578125|unsuper_loss: 0.0
-average reward score: 5.08203125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.44%) |Training time=0.50s (23.06%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8066|ppo_ep: 1|act_loss: -0.002658843994140625|cri_loss: -0.0011577606201171875|unsuper_loss: 0.0
-average reward score: 5.1796875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.52%) |Training time=0.50s (22.90%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8067|ppo_ep: 1|act_loss: 0.01605224609375|cri_loss: 0.0081939697265625|unsuper_loss: 0.0
-average reward score: 4.80859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.76%) |Training time=0.49s (22.70%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8068|ppo_ep: 1|act_loss: -0.0056304931640625|cri_loss: -0.0023651123046875|unsuper_loss: 0.0
-average reward score: 5.29296875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.47%) |Training time=0.50s (22.94%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.57
-[2023-04-14 13:42:56,307] [INFO] [logging.py:96:log_dist] [Rank 0] step=8070, skipped=100, lr=[3.004218743556186e-08, 3.004218743556186e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:42:56,325] [INFO] [timer.py:199:stop] epoch=0/micro_step=8070/global_step=8070, RunningAvgSamplesPerSec=105.68438726030831, CurrSamplesPerSec=97.16916567905542, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:42:56,418] [INFO] [logging.py:96:log_dist] [Rank 0] step=8070, skipped=140, lr=[2.0149907512475585e-08, 2.0149907512475585e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 8069|ppo_ep: 1|act_loss: -0.00200653076171875|cri_loss: -0.0005998611450195312|unsuper_loss: 0.0
-average reward score: 5.7265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.76%) |Training time=0.49s (22.72%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8070|ppo_ep: 1|act_loss: -0.019775390625|cri_loss: -0.00946807861328125|unsuper_loss: 0.0
-average reward score: 4.7109375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.28%) |Training time=0.50s (23.13%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8071|ppo_ep: 1|act_loss: 0.01132965087890625|cri_loss: 0.00701141357421875|unsuper_loss: 0.0
-average reward score: 5.6640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.89%) |Training time=0.49s (22.59%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8072|ppo_ep: 1|act_loss: -0.01421356201171875|cri_loss: -0.00688934326171875|unsuper_loss: 0.0
-average reward score: 5.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.46%) |Training time=0.50s (22.98%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8073|ppo_ep: 1|act_loss: -0.00011110305786132812|cri_loss: 0.00040340423583984375|unsuper_loss: 0.0
-average reward score: 4.9453125
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.14%) |Training time=0.51s (23.36%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8074|ppo_ep: 1|act_loss: -0.022796630859375|cri_loss: -0.0111541748046875|unsuper_loss: 0.0
-average reward score: 5.24609375
--------------------------------------------------------------------------------------
-|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.61s (72.73%) |Training time=0.50s (22.76%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.44 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8075|ppo_ep: 1|act_loss: -0.002719879150390625|cri_loss: -0.0008921623229980469|unsuper_loss: 0.0
-average reward score: 4.9921875
--------------------------------------------------------------------------------------
-|E2E latency=2.50s |Gather latency=0.00s (0.00%) |Generate time=1.85s (73.98%) |Training time=0.55s (22.10%) |Others=0.10 (3.92%)|CurSamplesPerSec=12.79 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8076|ppo_ep: 1|act_loss: -0.0154876708984375|cri_loss: -0.0076904296875|unsuper_loss: 0.0
-average reward score: 5.69921875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.80%) |Training time=0.49s (22.59%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8077|ppo_ep: 1|act_loss: 0.019439697265625|cri_loss: 0.00988006591796875|unsuper_loss: 0.0
-average reward score: 5.3046875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.21%) |Training time=0.51s (23.16%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8078|ppo_ep: 1|act_loss: 0.006072998046875|cri_loss: 0.003635406494140625|unsuper_loss: 0.0
-average reward score: 5.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.59%) |Training time=0.50s (22.81%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.57
-[2023-04-14 13:43:18,464] [INFO] [logging.py:96:log_dist] [Rank 0] step=8080, skipped=100, lr=[2.8008006696170348e-08, 2.8008006696170348e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:43:18,483] [INFO] [timer.py:199:stop] epoch=0/micro_step=8080/global_step=8080, RunningAvgSamplesPerSec=105.6675462528966, CurrSamplesPerSec=98.04500416380557, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:43:18,575] [INFO] [logging.py:96:log_dist] [Rank 0] step=8080, skipped=140, lr=[1.8948725820160663e-08, 1.8948725820160663e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 8079|ppo_ep: 1|act_loss: -0.007293701171875|cri_loss: -0.003353118896484375|unsuper_loss: 0.0
-average reward score: 5.703125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.15%) |Training time=0.49s (22.37%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8080|ppo_ep: 1|act_loss: -0.01055908203125|cri_loss: -0.0049285888671875|unsuper_loss: 0.0
-average reward score: 5.83984375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.69%) |Training time=0.49s (22.70%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8081|ppo_ep: 1|act_loss: -0.005428314208984375|cri_loss: -0.00260162353515625|unsuper_loss: 0.0
-average reward score: 5.2734375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.13%) |Training time=0.48s (22.21%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8082|ppo_ep: 1|act_loss: 0.005420684814453125|cri_loss: 0.003185272216796875|unsuper_loss: 0.0
-average reward score: 5.0625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.62%) |Training time=0.50s (22.77%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8083|ppo_ep: 1|act_loss: 0.006107330322265625|cri_loss: 0.003162384033203125|unsuper_loss: 0.0
-average reward score: 5.3359375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.89%) |Training time=0.49s (22.46%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8084|ppo_ep: 1|act_loss: -0.0063629150390625|cri_loss: -0.003002166748046875|unsuper_loss: 0.0
-average reward score: 5.45703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.14%) |Training time=0.48s (22.22%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8085|ppo_ep: 1|act_loss: -0.0124359130859375|cri_loss: -0.005931854248046875|unsuper_loss: 0.0
-average reward score: 5.265625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.02%) |Training time=0.48s (22.37%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8086|ppo_ep: 1|act_loss: -0.008056640625|cri_loss: -0.003505706787109375|unsuper_loss: 0.0
-average reward score: 5.55078125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.02%) |Training time=0.48s (22.34%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8087|ppo_ep: 1|act_loss: 0.002338409423828125|cri_loss: 0.0013360977172851562|unsuper_loss: 0.0
-average reward score: 4.47265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.10%) |Training time=0.48s (22.26%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8088|ppo_ep: 1|act_loss: 0.05010986328125|cri_loss: 0.0265655517578125|unsuper_loss: 0.0
-average reward score: 5.2109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.98%) |Training time=0.48s (22.37%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.57
-[2023-04-14 13:43:40,120] [INFO] [logging.py:96:log_dist] [Rank 0] step=8090, skipped=100, lr=[2.6044928950231964e-08, 2.6044928950231964e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:43:40,139] [INFO] [timer.py:199:stop] epoch=0/micro_step=8090/global_step=8090, RunningAvgSamplesPerSec=105.65891652113257, CurrSamplesPerSec=100.38415276719263, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:43:40,231] [INFO] [logging.py:96:log_dist] [Rank 0] step=8090, skipped=140, lr=[1.7784319292939166e-08, 1.7784319292939166e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 8089|ppo_ep: 1|act_loss: -0.00270843505859375|cri_loss: -0.0010089874267578125|unsuper_loss: 0.0
-average reward score: 5.5
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.16%) |Training time=0.48s (22.31%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8090|ppo_ep: 1|act_loss: 0.007781982421875|cri_loss: 0.0042724609375|unsuper_loss: 0.0
-average reward score: 6.19140625
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.13%) |Training time=0.48s (20.54%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.77 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8091|ppo_ep: 1|act_loss: -0.00762939453125|cri_loss: -0.0021915435791015625|unsuper_loss: 0.0
-average reward score: 5.71484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.94%) |Training time=0.48s (22.42%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8092|ppo_ep: 1|act_loss: 0.00388336181640625|cri_loss: 0.001983642578125|unsuper_loss: 0.0
-average reward score: 5.1875
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.66%) |Training time=0.49s (22.72%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8093|ppo_ep: 1|act_loss: 0.05047607421875|cri_loss: 0.0269012451171875|unsuper_loss: 0.0
-average reward score: 5.1171875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.06%) |Training time=0.48s (22.31%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8094|ppo_ep: 1|act_loss: 0.019500732421875|cri_loss: 0.0110321044921875|unsuper_loss: 0.0
-average reward score: 4.95703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.86%) |Training time=0.49s (22.55%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8095|ppo_ep: 1|act_loss: -0.018035888671875|cri_loss: -0.0085296630859375|unsuper_loss: 0.0
-average reward score: 5.91015625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.12%) |Training time=0.48s (22.33%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8096|ppo_ep: 1|act_loss: 0.0235137939453125|cri_loss: 0.01200103759765625|unsuper_loss: 0.0
-average reward score: 5.66796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.96%) |Training time=0.49s (22.41%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8097|ppo_ep: 1|act_loss: -0.021636962890625|cri_loss: -0.010711669921875|unsuper_loss: 0.0
-average reward score: 5.75390625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.81%) |Training time=0.49s (22.59%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8098|ppo_ep: 1|act_loss: -0.033905029296875|cri_loss: -0.015106201171875|unsuper_loss: 0.0
-average reward score: 5.734375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.64%) |Training time=0.49s (22.75%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57
-[2023-04-14 13:44:01,948] [INFO] [logging.py:96:log_dist] [Rank 0] step=8100, skipped=100, lr=[2.4152983295295917e-08, 2.4152983295295917e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:44:01,966] [INFO] [timer.py:199:stop] epoch=0/micro_step=8100/global_step=8100, RunningAvgSamplesPerSec=105.64966874876379, CurrSamplesPerSec=98.67942026513467, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:44:02,060] [INFO] [logging.py:96:log_dist] [Rank 0] step=8100, skipped=140, lr=[1.6656705190125078e-08, 1.6656705190125078e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 8099|ppo_ep: 1|act_loss: -0.01568603515625|cri_loss: -0.007781982421875|unsuper_loss: 0.0
-average reward score: 5.01953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.98%) |Training time=0.49s (22.46%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8100|ppo_ep: 1|act_loss: 0.00882720947265625|cri_loss: 0.004547119140625|unsuper_loss: 0.0
-average reward score: 5.125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.87%) |Training time=0.49s (22.53%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8101|ppo_ep: 1|act_loss: -0.002315521240234375|cri_loss: -0.0010395050048828125|unsuper_loss: 0.0
-average reward score: 5.515625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.99%) |Training time=0.49s (22.47%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8102|ppo_ep: 1|act_loss: -0.025299072265625|cri_loss: -0.0122222900390625|unsuper_loss: 0.0
-average reward score: 5.66015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.90%) |Training time=0.49s (22.47%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8103|ppo_ep: 1|act_loss: -0.0008783340454101562|cri_loss: -0.00017642974853515625|unsuper_loss: 0.0
-average reward score: 5.28125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.15%) |Training time=0.48s (22.24%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8104|ppo_ep: 1|act_loss: -0.03424072265625|cri_loss: -0.015899658203125|unsuper_loss: 0.0
-average reward score: 5.86328125
--------------------------------------------------------------------------------------
-|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.69s (73.86%) |Training time=0.50s (21.75%) |Others=0.10 (4.39%)|CurSamplesPerSec=14.01 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8105|ppo_ep: 1|act_loss: 0.029693603515625|cri_loss: 0.0153961181640625|unsuper_loss: 0.0
-average reward score: 5.34375
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.01%) |Training time=0.48s (20.76%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.74 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8106|ppo_ep: 1|act_loss: 0.016998291015625|cri_loss: 0.00864410400390625|unsuper_loss: 0.0
-average reward score: 5.56640625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.04%) |Training time=0.50s (22.85%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8107|ppo_ep: 1|act_loss: 0.0084686279296875|cri_loss: 0.00434112548828125|unsuper_loss: 0.0
-average reward score: 4.7890625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.73%) |Training time=0.50s (22.74%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8108|ppo_ep: 1|act_loss: -0.0159454345703125|cri_loss: -0.00785064697265625|unsuper_loss: 0.0
-average reward score: 5.76953125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.08%) |Training time=0.49s (22.31%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.57
-[2023-04-14 13:44:23,944] [INFO] [logging.py:96:log_dist] [Rank 0] step=8110, skipped=100, lr=[2.233219777456001e-08, 2.233219777456001e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:44:23,962] [INFO] [timer.py:199:stop] epoch=0/micro_step=8110/global_step=8110, RunningAvgSamplesPerSec=105.63997826562213, CurrSamplesPerSec=102.96742378386755, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:44:24,059] [INFO] [logging.py:96:log_dist] [Rank 0] step=8110, skipped=140, lr=[1.556590022567972e-08, 1.556590022567972e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 8109|ppo_ep: 1|act_loss: -0.01812744140625|cri_loss: -0.008941650390625|unsuper_loss: 0.0
-average reward score: 4.953125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.41%) |Training time=0.48s (21.83%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8110|ppo_ep: 1|act_loss: 0.00665283203125|cri_loss: 0.0033893585205078125|unsuper_loss: 0.0
-average reward score: 4.83984375
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.23%) |Training time=0.42s (19.02%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8111|ppo_ep: 1|act_loss: -0.00710296630859375|cri_loss: -0.0033931732177734375|unsuper_loss: 0.0
-average reward score: 5.75
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.18%) |Training time=0.46s (21.15%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8112|ppo_ep: 1|act_loss: -0.0011281967163085938|cri_loss: -6.67572021484375e-05|unsuper_loss: 0.0
-average reward score: 5.9609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.47%) |Training time=0.45s (20.86%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8113|ppo_ep: 1|act_loss: 0.0004973411560058594|cri_loss: 0.00041937828063964844|unsuper_loss: 0.0
-average reward score: 5.05859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.39%) |Training time=0.45s (20.92%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8114|ppo_ep: 1|act_loss: 0.004322052001953125|cri_loss: 0.0022449493408203125|unsuper_loss: 0.0
-average reward score: 5.9609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.50%) |Training time=0.45s (20.83%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8115|ppo_ep: 1|act_loss: -0.01165008544921875|cri_loss: -0.005748748779296875|unsuper_loss: 0.0
-average reward score: 5.3203125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.30%) |Training time=0.44s (20.13%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8116|ppo_ep: 1|act_loss: 0.04052734375|cri_loss: 0.0211029052734375|unsuper_loss: 0.0
-average reward score: 6.1953125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.07%) |Training time=0.46s (21.28%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8117|ppo_ep: 1|act_loss: 0.00750732421875|cri_loss: 0.00391387939453125|unsuper_loss: 0.0
-average reward score: 4.9765625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.21%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8118|ppo_ep: 1|act_loss: 0.0135040283203125|cri_loss: 0.006977081298828125|unsuper_loss: 0.0
-average reward score: 4.80859375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.13%) |Training time=0.46s (21.22%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.57
-[2023-04-14 13:44:45,867] [INFO] [logging.py:96:log_dist] [Rank 0] step=8120, skipped=100, lr=[2.0582599376458696e-08, 2.0582599376458696e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:44:45,885] [INFO] [timer.py:199:stop] epoch=0/micro_step=8120/global_step=8120, RunningAvgSamplesPerSec=105.64460919016575, CurrSamplesPerSec=96.58647737641874, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:44:45,978] [INFO] [logging.py:96:log_dist] [Rank 0] step=8120, skipped=140, lr=[1.4511920567963911e-08, 1.4511920567963911e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 8119|ppo_ep: 1|act_loss: -0.017852783203125|cri_loss: -0.00876617431640625|unsuper_loss: 0.0
-average reward score: 6.0859375
--------------------------------------------------------------------------------------
-|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.77s (74.94%) |Training time=0.49s (20.90%) |Others=0.10 (4.16%)|CurSamplesPerSec=13.52 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8120|ppo_ep: 1|act_loss: 0.01031494140625|cri_loss: 0.00534820556640625|unsuper_loss: 0.0
-average reward score: 5.6796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.02%) |Training time=0.46s (21.33%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8121|ppo_ep: 1|act_loss: -0.0057525634765625|cri_loss: -0.002651214599609375|unsuper_loss: 0.0
-average reward score: 4.7109375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.90%) |Training time=0.47s (21.57%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8122|ppo_ep: 1|act_loss: 0.0091400146484375|cri_loss: 0.0060882568359375|unsuper_loss: 0.0
-average reward score: 5.5703125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.90%) |Training time=0.47s (21.43%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8123|ppo_ep: 1|act_loss: -0.00714874267578125|cri_loss: -0.0032329559326171875|unsuper_loss: 0.0
-average reward score: 5.53125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.29%) |Training time=0.46s (21.15%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8124|ppo_ep: 1|act_loss: -0.0182342529296875|cri_loss: -0.00887298583984375|unsuper_loss: 0.0
-average reward score: 5.13671875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.60%) |Training time=0.47s (21.75%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8125|ppo_ep: 1|act_loss: -0.00010943412780761719|cri_loss: 7.2479248046875e-05|unsuper_loss: 0.0
-average reward score: 5.25
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.46s (21.43%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8126|ppo_ep: 1|act_loss: -0.00679779052734375|cri_loss: -0.0029354095458984375|unsuper_loss: 0.0
-average reward score: 5.609375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.56%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57
-[2023-04-14 13:45:03,323] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 8127|ppo_ep: 1|act_loss: -0.011077880859375|cri_loss: -0.005207061767578125|unsuper_loss: 0.0
-average reward score: 5.70703125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.86%) |Training time=0.48s (22.07%) |Others=0.09 (4.07%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.57
-[2023-04-14 13:45:05,485] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 8128|ppo_ep: 1|act_loss: -0.0008001327514648438|cri_loss: -0.0003199577331542969|unsuper_loss: 0.0
-average reward score: 5.44921875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.48s (22.28%) |Others=0.09 (4.15%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.57
-[2023-04-14 13:45:07,544] [INFO] [logging.py:96:log_dist] [Rank 0] step=8130, skipped=100, lr=[1.8904214034259727e-08, 1.8904214034259727e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:45:07,562] [INFO] [timer.py:199:stop] epoch=0/micro_step=8130/global_step=8130, RunningAvgSamplesPerSec=105.6427885351493, CurrSamplesPerSec=100.87728193430314, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:45:07,655] [INFO] [logging.py:96:log_dist] [Rank 0] step=8130, skipped=142, lr=[1.3695261579316776e-08, 1.3695261579316776e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 8129|ppo_ep: 1|act_loss: 0.004642486572265625|cri_loss: 0.002925872802734375|unsuper_loss: 0.0
-average reward score: 5.61328125
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.30%) |Training time=0.48s (22.10%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8130|ppo_ep: 1|act_loss: 0.00099945068359375|cri_loss: 0.0006532669067382812|unsuper_loss: 0.0
-average reward score: 6.16015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.84%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8131|ppo_ep: 1|act_loss: -0.02508544921875|cri_loss: -0.01206207275390625|unsuper_loss: 0.0
-average reward score: 5.6640625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.33%) |Training time=0.48s (22.14%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8132|ppo_ep: 1|act_loss: -0.0108642578125|cri_loss: -0.0050048828125|unsuper_loss: 0.0
-average reward score: 5.92578125
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.32%) |Training time=0.48s (22.07%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8133|ppo_ep: 1|act_loss: 0.007167816162109375|cri_loss: 0.003833770751953125|unsuper_loss: 0.0
-average reward score: 6.10546875
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.46%) |Training time=0.49s (22.09%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8134|ppo_ep: 1|act_loss: -0.0309295654296875|cri_loss: -0.015045166015625|unsuper_loss: 0.0
-average reward score: 5.8046875
--------------------------------------------------------------------------------------
-|E2E latency=2.51s |Gather latency=0.00s (0.00%) |Generate time=1.87s (74.40%) |Training time=0.54s (21.60%) |Others=0.10 (4.00%)|CurSamplesPerSec=12.74 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8135|ppo_ep: 1|act_loss: 0.0028095245361328125|cri_loss: 0.0014448165893554688|unsuper_loss: 0.0
-average reward score: 5.09765625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.90%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8136|ppo_ep: 1|act_loss: 0.01763916015625|cri_loss: 0.00968170166015625|unsuper_loss: 0.0
-average reward score: 5.53515625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.48s (21.94%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8137|ppo_ep: 1|act_loss: -0.00473785400390625|cri_loss: -0.0020809173583984375|unsuper_loss: 0.0
-average reward score: 5.53125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.85%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8138|ppo_ep: 1|act_loss: -0.018798828125|cri_loss: -0.00905609130859375|unsuper_loss: 0.0
-average reward score: 5.4765625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.34%) |Training time=0.48s (22.02%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57
-[2023-04-14 13:45:29,625] [INFO] [logging.py:96:log_dist] [Rank 0] step=8140, skipped=100, lr=[1.729706662568434e-08, 1.729706662568434e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:45:29,643] [INFO] [timer.py:199:stop] epoch=0/micro_step=8140/global_step=8140, RunningAvgSamplesPerSec=105.63444801584032, CurrSamplesPerSec=99.91842909073584, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:45:29,738] [INFO] [logging.py:96:log_dist] [Rank 0] step=8140, skipped=142, lr=[1.2707606475514667e-08, 1.2707606475514667e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 8139|ppo_ep: 1|act_loss: -0.0002751350402832031|cri_loss: 0.00021839141845703125|unsuper_loss: 0.0
-average reward score: 5.34765625
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.16%) |Training time=0.48s (22.23%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8140|ppo_ep: 1|act_loss: 0.0008211135864257812|cri_loss: 0.0006456375122070312|unsuper_loss: 0.0
-average reward score: 6.015625
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.93%) |Training time=0.49s (22.47%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8141|ppo_ep: 1|act_loss: -0.0084075927734375|cri_loss: -0.003559112548828125|unsuper_loss: 0.0
-average reward score: 5.5234375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.13%) |Training time=0.49s (22.37%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8142|ppo_ep: 1|act_loss: -0.009246826171875|cri_loss: -0.00395965576171875|unsuper_loss: 0.0
-average reward score: 5.5390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.60%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8143|ppo_ep: 1|act_loss: -0.019439697265625|cri_loss: -0.00914764404296875|unsuper_loss: 0.0
-average reward score: 5.92578125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.73%) |Training time=0.47s (21.72%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8144|ppo_ep: 1|act_loss: -0.02069091796875|cri_loss: -0.01020050048828125|unsuper_loss: 0.0
-average reward score: 6.4765625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.68%) |Training time=0.47s (21.69%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8145|ppo_ep: 1|act_loss: -0.01068115234375|cri_loss: -0.005199432373046875|unsuper_loss: 0.0
-average reward score: 5.25390625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.51%) |Training time=0.47s (21.94%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8146|ppo_ep: 1|act_loss: -0.0067138671875|cri_loss: -0.0033111572265625|unsuper_loss: 0.0
-average reward score: 5.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.35%) |Training time=0.48s (21.87%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8147|ppo_ep: 1|act_loss: 0.0004782676696777344|cri_loss: 0.0006284713745117188|unsuper_loss: 0.0
-average reward score: 5.4765625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.86%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8148|ppo_ep: 1|act_loss: -0.023040771484375|cri_loss: -0.01136016845703125|unsuper_loss: 0.0
-average reward score: 5.5234375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.60%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.57
-[2023-04-14 13:45:51,466] [INFO] [logging.py:96:log_dist] [Rank 0] step=8150, skipped=100, lr=[1.5761180972532812e-08, 1.5761180972532812e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:45:51,484] [INFO] [timer.py:199:stop] epoch=0/micro_step=8150/global_step=8150, RunningAvgSamplesPerSec=105.63041590312197, CurrSamplesPerSec=103.20169652866737, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:45:51,577] [INFO] [logging.py:96:log_dist] [Rank 0] step=8150, skipped=142, lr=[1.1756819045251055e-08, 1.1756819045251055e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 8149|ppo_ep: 1|act_loss: -0.011749267578125|cri_loss: -0.005733489990234375|unsuper_loss: 0.0
-average reward score: 5.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.48%) |Training time=0.47s (20.33%) |Others=0.10 (4.19%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8150|ppo_ep: 1|act_loss: -0.01499176025390625|cri_loss: -0.007411956787109375|unsuper_loss: 0.0
-average reward score: 5.0546875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.83%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8151|ppo_ep: 1|act_loss: 0.0272369384765625|cri_loss: 0.0148773193359375|unsuper_loss: 0.0
-average reward score: 4.75
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.80%) |Training time=0.47s (21.65%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8152|ppo_ep: 1|act_loss: -0.00673675537109375|cri_loss: -0.0031871795654296875|unsuper_loss: 0.0
-average reward score: 5.1484375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.77%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8153|ppo_ep: 1|act_loss: -0.01482391357421875|cri_loss: -0.00731658935546875|unsuper_loss: 0.0
-average reward score: 5.5078125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.47s (21.66%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8154|ppo_ep: 1|act_loss: -0.015289306640625|cri_loss: -0.00749969482421875|unsuper_loss: 0.0
-average reward score: 4.94921875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.73%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57
-[2023-04-14 13:46:04,438] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, but hysteresis is 2. Reducing hysteresis to 1
-epoch: 0|step: 8155|ppo_ep: 1|act_loss: -0.00965118408203125|cri_loss: -0.004589080810546875|unsuper_loss: 0.0
-average reward score: 5.765625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.54%) |Training time=0.45s (20.90%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8156|ppo_ep: 1|act_loss: -0.015777587890625|cri_loss: -0.0076141357421875|unsuper_loss: 0.0
-average reward score: 5.296875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.79%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8157|ppo_ep: 1|act_loss: 0.0056304931640625|cri_loss: 0.00444793701171875|unsuper_loss: 0.0
-average reward score: 5.64453125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.79%) |Training time=0.47s (21.67%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8158|ppo_ep: 1|act_loss: -0.0048980712890625|cri_loss: -0.0023345947265625|unsuper_loss: 0.0
-average reward score: 6.34375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.47s (21.72%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.57
-[2023-04-14 13:46:13,063] [INFO] [logging.py:96:log_dist] [Rank 0] step=8160, skipped=101, lr=[1.4439831523126524e-08, 1.4439831523126524e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:46:13,082] [INFO] [timer.py:199:stop] epoch=0/micro_step=8160/global_step=8160, RunningAvgSamplesPerSec=105.62953923713883, CurrSamplesPerSec=105.34076847791707, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:46:13,174] [INFO] [logging.py:96:log_dist] [Rank 0] step=8160, skipped=142, lr=[1.0842913381488851e-08, 1.0842913381488851e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 8159|ppo_ep: 1|act_loss: 0.0166015625|cri_loss: 0.00849151611328125|unsuper_loss: 0.0
-average reward score: 5.328125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.85%) |Training time=0.47s (21.62%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8160|ppo_ep: 1|act_loss: -0.001918792724609375|cri_loss: -0.000820159912109375|unsuper_loss: 0.0
-average reward score: 6.1953125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.74%) |Training time=0.47s (21.61%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8161|ppo_ep: 1|act_loss: -0.012054443359375|cri_loss: -0.005950927734375|unsuper_loss: 0.0
-average reward score: 5.27734375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.83%) |Training time=0.47s (21.59%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.57
-[2023-04-14 13:46:19,563] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192
-epoch: 0|step: 8162|ppo_ep: 1|act_loss: -0.01099395751953125|cri_loss: -0.00539398193359375|unsuper_loss: 0.0
-average reward score: 5.16015625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.78%) |Training time=0.45s (20.61%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8163|ppo_ep: 1|act_loss: 0.00798797607421875|cri_loss: 0.00482177734375|unsuper_loss: 0.0
-average reward score: 5.25
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.96%) |Training time=0.47s (20.67%) |Others=0.10 (4.36%)|CurSamplesPerSec=14.18 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8164|ppo_ep: 1|act_loss: 0.0006723403930664062|cri_loss: 0.0004429817199707031|unsuper_loss: 0.0
-average reward score: 5.8203125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.62%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8165|ppo_ep: 1|act_loss: 0.01507568359375|cri_loss: 0.007755279541015625|unsuper_loss: 0.0
-average reward score: 6.05078125
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.57%) |Training time=0.47s (20.21%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8166|ppo_ep: 1|act_loss: -0.0082244873046875|cri_loss: -0.003971099853515625|unsuper_loss: 0.0
-average reward score: 5.6328125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.47s (21.58%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8167|ppo_ep: 1|act_loss: -0.002979278564453125|cri_loss: -0.0011272430419921875|unsuper_loss: 0.0
-average reward score: 5.25390625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.46s (21.28%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8168|ppo_ep: 1|act_loss: -0.0156402587890625|cri_loss: -0.007701873779296875|unsuper_loss: 0.0
-average reward score: 4.68359375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.21%) |Training time=0.45s (21.11%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.57
-[2023-04-14 13:46:34,899] [INFO] [logging.py:96:log_dist] [Rank 0] step=8170, skipped=102, lr=[1.3176238413572798e-08, 1.3176238413572798e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:46:34,917] [INFO] [timer.py:199:stop] epoch=0/micro_step=8170/global_step=8170, RunningAvgSamplesPerSec=105.63095062148561, CurrSamplesPerSec=107.79319611192628, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:46:35,010] [INFO] [logging.py:96:log_dist] [Rank 0] step=8170, skipped=142, lr=[9.965903030514923e-09, 9.965903030514923e-09], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 8169|ppo_ep: 1|act_loss: -0.03314208984375|cri_loss: -0.0162353515625|unsuper_loss: 0.0
-average reward score: 5.2265625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.10%) |Training time=0.46s (21.35%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8170|ppo_ep: 1|act_loss: -0.05389404296875|cri_loss: -0.0263214111328125|unsuper_loss: 0.0
-average reward score: 5.55078125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.31%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8171|ppo_ep: 1|act_loss: -0.01548004150390625|cri_loss: -0.0076446533203125|unsuper_loss: 0.0
-average reward score: 5.86328125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.19%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8172|ppo_ep: 1|act_loss: -0.0423583984375|cri_loss: -0.02008056640625|unsuper_loss: 0.0
-average reward score: 5.37109375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.60%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8173|ppo_ep: 1|act_loss: -0.0012054443359375|cri_loss: 0.003978729248046875|unsuper_loss: 0.0
-average reward score: 5.578125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.08%) |Training time=0.46s (21.35%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8174|ppo_ep: 1|act_loss: -0.005931854248046875|cri_loss: -0.0025997161865234375|unsuper_loss: 0.0
-average reward score: 5.34375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.46s (21.46%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8175|ppo_ep: 1|act_loss: -0.0185089111328125|cri_loss: -0.00872039794921875|unsuper_loss: 0.0
-average reward score: 5.13671875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.66%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8176|ppo_ep: 1|act_loss: -0.0109405517578125|cri_loss: -0.005306243896484375|unsuper_loss: 0.0
-average reward score: 5.3203125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.12%) |Training time=0.46s (21.18%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8177|ppo_ep: 1|act_loss: -0.01474761962890625|cri_loss: -0.0072021484375|unsuper_loss: 0.0
-average reward score: 5.04296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.31%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8178|ppo_ep: 1|act_loss: 0.027130126953125|cri_loss: 0.0140533447265625|unsuper_loss: 0.0
-average reward score: 5.859375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.49%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.57
-[2023-04-14 13:46:56,452] [INFO] [logging.py:96:log_dist] [Rank 0] step=8180, skipped=102, lr=[1.1840003415331683e-08, 1.1840003415331683e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:46:56,470] [INFO] [timer.py:199:stop] epoch=0/micro_step=8180/global_step=8180, RunningAvgSamplesPerSec=105.63260714351426, CurrSamplesPerSec=106.2880881995418, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:46:56,607] [INFO] [logging.py:96:log_dist] [Rank 0] step=8180, skipped=142, lr=[9.12580099173832e-09, 9.12580099173832e-09], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 8179|ppo_ep: 1|act_loss: -0.0120086669921875|cri_loss: -0.0056304931640625|unsuper_loss: 0.0
-average reward score: 5.91015625
--------------------------------------------------------------------------------------
-|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.47%) |Training time=0.47s (21.36%) |Others=0.14 (6.17%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8180|ppo_ep: 1|act_loss: -0.042755126953125|cri_loss: -0.01361846923828125|unsuper_loss: 0.0
-average reward score: 5.2578125
--------------------------------------------------------------------------------------
-|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.68s (74.74%) |Training time=0.46s (20.48%) |Others=0.11 (4.79%)|CurSamplesPerSec=14.19 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8181|ppo_ep: 1|act_loss: -0.00039386749267578125|cri_loss: 0.00017213821411132812|unsuper_loss: 0.0
-average reward score: 5.00390625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.02%) |Training time=0.46s (21.35%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8182|ppo_ep: 1|act_loss: -0.012725830078125|cri_loss: -0.00620269775390625|unsuper_loss: 0.0
-average reward score: 5.265625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.70%) |Training time=0.47s (21.63%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8183|ppo_ep: 1|act_loss: 0.03289794921875|cri_loss: 0.0170745849609375|unsuper_loss: 0.0
-average reward score: 5.02734375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.76%) |Training time=0.47s (21.54%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.57
-[2023-04-14 13:47:07,378] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096
-epoch: 0|step: 8184|ppo_ep: 1|act_loss: 0.01544189453125|cri_loss: 0.00815582275390625|unsuper_loss: 0.0
-average reward score: 4.6796875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.25%) |Training time=0.45s (21.10%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8185|ppo_ep: 1|act_loss: -0.02349853515625|cri_loss: -0.0108642578125|unsuper_loss: 0.0
-average reward score: 5.234375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.70%) |Training time=0.46s (21.51%) |Others=0.10 (4.79%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8186|ppo_ep: 1|act_loss: -0.00730133056640625|cri_loss: -0.0034046173095703125|unsuper_loss: 0.0
-average reward score: 4.81640625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.09%) |Training time=0.46s (21.23%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8187|ppo_ep: 1|act_loss: -0.01087188720703125|cri_loss: -0.00518035888671875|unsuper_loss: 0.0
-average reward score: 5.4453125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.24%) |Training time=0.45s (21.19%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8188|ppo_ep: 1|act_loss: 0.011016845703125|cri_loss: 0.005649566650390625|unsuper_loss: 0.0
-average reward score: 5.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.19%) |Training time=0.45s (21.15%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.57
-[2023-04-14 13:47:18,096] [INFO] [logging.py:96:log_dist] [Rank 0] step=8190, skipped=103, lr=[1.0698389333422932e-08, 1.0698389333422932e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:47:18,114] [INFO] [timer.py:199:stop] epoch=0/micro_step=8190/global_step=8190, RunningAvgSamplesPerSec=105.63530852231541, CurrSamplesPerSec=109.11442656642257, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:47:18,206] [INFO] [logging.py:96:log_dist] [Rank 0] step=8190, skipped=142, lr=[8.322619717497648e-09, 8.322619717497648e-09], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 8189|ppo_ep: 1|act_loss: -0.01428985595703125|cri_loss: -0.00637054443359375|unsuper_loss: 0.0
-average reward score: 5.51953125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.21%) |Training time=0.46s (21.24%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8190|ppo_ep: 1|act_loss: -0.00807952880859375|cri_loss: -0.0038166046142578125|unsuper_loss: 0.0
-average reward score: 5.9140625
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.00%) |Training time=0.47s (21.42%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8191|ppo_ep: 1|act_loss: -0.0005846023559570312|cri_loss: -0.00010395050048828125|unsuper_loss: 0.0
-average reward score: 4.96484375
--------------------------------------------------------------------------------------
-|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.32%) |Training time=0.48s (22.09%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8192|ppo_ep: 1|act_loss: -0.023468017578125|cri_loss: -0.01143646240234375|unsuper_loss: 0.0
-average reward score: 5.546875
--------------------------------------------------------------------------------------
-|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.45%) |Training time=0.49s (22.02%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8193|ppo_ep: 1|act_loss: 0.004619598388671875|cri_loss: 0.00238800048828125|unsuper_loss: 0.0
-average reward score: 6.140625
--------------------------------------------------------------------------------------
-|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.65%) |Training time=0.48s (20.93%) |Others=0.10 (4.41%)|CurSamplesPerSec=14.10 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8194|ppo_ep: 1|act_loss: -0.001743316650390625|cri_loss: -0.0005774497985839844|unsuper_loss: 0.0
-average reward score: 5.59765625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.18%) |Training time=0.45s (21.15%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8195|ppo_ep: 1|act_loss: -0.0200042724609375|cri_loss: -0.008514404296875|unsuper_loss: 0.0
-average reward score: 5.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.42%) |Training time=0.47s (20.11%) |Others=0.10 (4.47%)|CurSamplesPerSec=13.74 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8196|ppo_ep: 1|act_loss: -0.0178985595703125|cri_loss: -0.0088043212890625|unsuper_loss: 0.0
-average reward score: 5.734375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.29%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8197|ppo_ep: 1|act_loss: 0.02801513671875|cri_loss: 0.0142059326171875|unsuper_loss: 0.0
-average reward score: 6.1875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.45s (21.14%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8198|ppo_ep: 1|act_loss: -0.0113067626953125|cri_loss: -0.004970550537109375|unsuper_loss: 0.0
-average reward score: 5.703125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.06%) |Training time=0.46s (21.28%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.57
-[2023-04-14 13:47:40,025] [INFO] [logging.py:96:log_dist] [Rank 0] step=8200, skipped=103, lr=[9.497721415816521e-09, 9.497721415816521e-09], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:47:40,044] [INFO] [timer.py:199:stop] epoch=0/micro_step=8200/global_step=8200, RunningAvgSamplesPerSec=105.63448505052008, CurrSamplesPerSec=103.44995325327169, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:47:40,136] [INFO] [logging.py:96:log_dist] [Rank 0] step=8200, skipped=142, lr=[7.556371112877325e-09, 7.556371112877325e-09], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 8199|ppo_ep: 1|act_loss: 0.011444091796875|cri_loss: 0.006195068359375|unsuper_loss: 0.0
-average reward score: 6.37109375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.72%) |Training time=0.47s (21.76%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8200|ppo_ep: 1|act_loss: -0.0039825439453125|cri_loss: -0.0019474029541015625|unsuper_loss: 0.0
-average reward score: 5.55078125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.09%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8201|ppo_ep: 1|act_loss: 0.004802703857421875|cri_loss: 0.002658843994140625|unsuper_loss: 0.0
-average reward score: 5.48046875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.35%) |Training time=0.45s (20.99%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8202|ppo_ep: 1|act_loss: -0.0181884765625|cri_loss: -0.00818634033203125|unsuper_loss: 0.0
-average reward score: 5.43359375
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.46%) |Training time=0.45s (20.86%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8203|ppo_ep: 1|act_loss: 0.010589599609375|cri_loss: 0.0053863525390625|unsuper_loss: 0.0
-average reward score: 5.34765625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.35%) |Training time=0.45s (21.09%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8204|ppo_ep: 1|act_loss: 0.033782958984375|cri_loss: 0.0172882080078125|unsuper_loss: 0.0
-average reward score: 5.96484375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.17%) |Training time=0.45s (21.15%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8205|ppo_ep: 1|act_loss: 0.019134521484375|cri_loss: 0.01016998291015625|unsuper_loss: 0.0
-average reward score: 5.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.56%) |Training time=0.44s (20.74%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8206|ppo_ep: 1|act_loss: -0.020904541015625|cri_loss: -0.00949859619140625|unsuper_loss: 0.0
-average reward score: 5.4921875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.96%) |Training time=0.46s (21.40%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8207|ppo_ep: 1|act_loss: -0.008209228515625|cri_loss: -0.00391387939453125|unsuper_loss: 0.0
-average reward score: 5.87890625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.02%) |Training time=0.46s (21.37%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8208|ppo_ep: 1|act_loss: -0.0041656494140625|cri_loss: -0.001949310302734375|unsuper_loss: 0.0
-average reward score: 5.11328125
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.32%) |Training time=0.45s (21.01%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.57
-[2023-04-14 13:48:01,494] [INFO] [logging.py:96:log_dist] [Rank 0] step=8210, skipped=103, lr=[8.368430858746151e-09, 8.368430858746151e-09], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:48:01,512] [INFO] [timer.py:199:stop] epoch=0/micro_step=8210/global_step=8210, RunningAvgSamplesPerSec=105.63972264176873, CurrSamplesPerSec=108.21060710870746, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:48:01,604] [INFO] [logging.py:96:log_dist] [Rank 0] step=8210, skipped=142, lr=[6.827066535529947e-09, 6.827066535529947e-09], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 8209|ppo_ep: 1|act_loss: 0.01201629638671875|cri_loss: 0.006168365478515625|unsuper_loss: 0.0
-average reward score: 5.56640625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.34%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8210|ppo_ep: 1|act_loss: -0.012237548828125|cri_loss: -0.00591278076171875|unsuper_loss: 0.0
-average reward score: 5.8671875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.94%) |Training time=0.46s (21.41%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8211|ppo_ep: 1|act_loss: -0.002490997314453125|cri_loss: -0.0008959770202636719|unsuper_loss: 0.0
-average reward score: 5.78125
--------------------------------------------------------------------------------------
-|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.81%) |Training time=0.46s (19.86%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8212|ppo_ep: 1|act_loss: -0.001079559326171875|cri_loss: -0.0004200935363769531|unsuper_loss: 0.0
-average reward score: 6.21875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.30%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8213|ppo_ep: 1|act_loss: -0.004608154296875|cri_loss: -0.002239227294921875|unsuper_loss: 0.0
-average reward score: 4.78515625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.38%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8214|ppo_ep: 1|act_loss: -0.0010585784912109375|cri_loss: -0.0004050731658935547|unsuper_loss: 0.0
-average reward score: 5.22265625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.98%) |Training time=0.46s (21.30%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8215|ppo_ep: 1|act_loss: 0.00254058837890625|cri_loss: 0.0013818740844726562|unsuper_loss: 0.0
-average reward score: 5.56640625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.15%) |Training time=0.45s (21.18%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8216|ppo_ep: 1|act_loss: 0.033416748046875|cri_loss: 0.018310546875|unsuper_loss: 0.0
-average reward score: 5.42578125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.05%) |Training time=0.46s (21.32%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8217|ppo_ep: 1|act_loss: 0.0007562637329101562|cri_loss: 0.0010824203491210938|unsuper_loss: 0.0
-average reward score: 5.421875
--------------------------------------------------------------------------------------
-|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.22%) |Training time=0.45s (21.11%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8218|ppo_ep: 1|act_loss: -0.0105743408203125|cri_loss: -0.004993438720703125|unsuper_loss: 0.0
-average reward score: 5.1796875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.33%) |Training time=0.46s (21.05%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.57
-[2023-04-14 13:48:23,164] [INFO] [logging.py:96:log_dist] [Rank 0] step=8220, skipped=103, lr=[7.3105344010205834e-09, 7.3105344010205834e-09], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:48:23,182] [INFO] [timer.py:199:stop] epoch=0/micro_step=8220/global_step=8220, RunningAvgSamplesPerSec=105.64292040215666, CurrSamplesPerSec=107.84100454045472, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:48:23,275] [INFO] [logging.py:96:log_dist] [Rank 0] step=8220, skipped=142, lr=[6.134716795508644e-09, 6.134716795508644e-09], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 8219|ppo_ep: 1|act_loss: 0.07666015625|cri_loss: 0.0426025390625|unsuper_loss: 0.0
-average reward score: 5.8671875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.95%) |Training time=0.46s (21.43%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8220|ppo_ep: 1|act_loss: -0.00444793701171875|cri_loss: -0.002170562744140625|unsuper_loss: 0.0
-average reward score: 5.12890625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.00%) |Training time=0.46s (21.36%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8221|ppo_ep: 1|act_loss: 0.0042572021484375|cri_loss: 0.002227783203125|unsuper_loss: 0.0
-average reward score: 5.77734375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.44%) |Training time=0.45s (21.00%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8222|ppo_ep: 1|act_loss: 0.030242919921875|cri_loss: 0.0159759521484375|unsuper_loss: 0.0
-average reward score: 5.5
--------------------------------------------------------------------------------------
-|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.21%) |Training time=0.46s (21.23%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8223|ppo_ep: 1|act_loss: -0.0179443359375|cri_loss: -0.0082550048828125|unsuper_loss: 0.0
-average reward score: 5.62109375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.15%) |Training time=0.46s (21.27%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8224|ppo_ep: 1|act_loss: -0.004680633544921875|cri_loss: -0.0015468597412109375|unsuper_loss: 0.0
-average reward score: 5.625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.26%) |Training time=0.45s (21.07%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8225|ppo_ep: 1|act_loss: 0.0092010498046875|cri_loss: 0.004795074462890625|unsuper_loss: 0.0
-average reward score: 5.84765625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.11%) |Training time=0.46s (21.23%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8226|ppo_ep: 1|act_loss: -0.004886627197265625|cri_loss: -0.002384185791015625|unsuper_loss: 0.0
-average reward score: 5.0234375
--------------------------------------------------------------------------------------
-|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.97%) |Training time=0.46s (19.70%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.79 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8227|ppo_ep: 1|act_loss: -0.0252532958984375|cri_loss: -0.01244354248046875|unsuper_loss: 0.0
-average reward score: 4.96875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.07%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8228|ppo_ep: 1|act_loss: 0.0002658367156982422|cri_loss: 0.0002161264419555664|unsuper_loss: 0.0
-average reward score: 6.0234375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.23%) |Training time=0.45s (21.12%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.57
-[2023-04-14 13:48:44,872] [INFO] [logging.py:96:log_dist] [Rank 0] step=8230, skipped=103, lr=[6.324047723218296e-09, 6.324047723218296e-09], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:48:44,890] [INFO] [timer.py:199:stop] epoch=0/micro_step=8230/global_step=8230, RunningAvgSamplesPerSec=105.64653553431586, CurrSamplesPerSec=106.69110328035784, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:48:44,974] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
-[2023-04-14 13:48:44,975] [INFO] [logging.py:96:log_dist] [Rank 0] step=8230, skipped=143, lr=[5.543206908509291e-09, 5.543206908509291e-09], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 8229|ppo_ep: 1|act_loss: -0.0010890960693359375|cri_loss: -0.00036597251892089844|unsuper_loss: 0.0
-average reward score: 5.44921875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.33%) |Training time=0.46s (21.58%) |Others=0.09 (4.09%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.57
-[2023-04-14 13:48:47,137] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
-epoch: 0|step: 8230|ppo_ep: 1|act_loss: 0.00010842084884643555|cri_loss: 0.00010132789611816406|unsuper_loss: 0.0
-average reward score: 4.859375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.48s (22.10%) |Others=0.09 (4.15%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8231|ppo_ep: 1|act_loss: 0.01229095458984375|cri_loss: 0.006389617919921875|unsuper_loss: 0.0
-average reward score: 5.546875
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.76%) |Training time=0.47s (21.62%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8232|ppo_ep: 1|act_loss: -0.05596923828125|cri_loss: -0.0206756591796875|unsuper_loss: 0.0
-average reward score: 5.4140625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.80%) |Training time=0.46s (21.53%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8233|ppo_ep: 1|act_loss: 0.0100555419921875|cri_loss: 0.005420684814453125|unsuper_loss: 0.0
-average reward score: 5.0234375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.65%) |Training time=0.47s (21.73%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8234|ppo_ep: 1|act_loss: 0.00278472900390625|cri_loss: 0.001552581787109375|unsuper_loss: 0.0
-average reward score: 5.42578125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.37%) |Training time=0.45s (20.96%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8235|ppo_ep: 1|act_loss: 0.005443572998046875|cri_loss: 0.00420379638671875|unsuper_loss: 0.0
-average reward score: 5.2890625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.68%) |Training time=0.47s (21.77%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8236|ppo_ep: 1|act_loss: 0.0144195556640625|cri_loss: 0.0084686279296875|unsuper_loss: 0.0
-average reward score: 5.51171875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.46s (21.54%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8237|ppo_ep: 1|act_loss: 0.01497650146484375|cri_loss: 0.00762939453125|unsuper_loss: 0.0
-average reward score: 5.93359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.81%) |Training time=0.46s (21.54%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8238|ppo_ep: 1|act_loss: 0.02374267578125|cri_loss: 0.0127105712890625|unsuper_loss: 0.0
-average reward score: 6.1640625
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.47s (21.71%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.57
-[2023-04-14 13:49:06,455] [INFO] [logging.py:96:log_dist] [Rank 0] step=8240, skipped=103, lr=[5.408985447451789e-09, 5.408985447451789e-09], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:49:06,473] [INFO] [timer.py:199:stop] epoch=0/micro_step=8240/global_step=8240, RunningAvgSamplesPerSec=105.64600782971256, CurrSamplesPerSec=105.71246698890796, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:49:06,566] [INFO] [logging.py:96:log_dist] [Rank 0] step=8240, skipped=144, lr=[4.981645860989148e-09, 4.981645860989148e-09], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 8239|ppo_ep: 1|act_loss: -0.0228118896484375|cri_loss: -0.01100921630859375|unsuper_loss: 0.0
-average reward score: 4.86328125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.81%) |Training time=0.47s (21.56%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8240|ppo_ep: 1|act_loss: 0.00240325927734375|cri_loss: 0.0013647079467773438|unsuper_loss: 0.0
-average reward score: 5.609375
--------------------------------------------------------------------------------------
-|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.19%) |Training time=0.47s (21.06%) |Others=0.17 (7.75%)|CurSamplesPerSec=14.30 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8241|ppo_ep: 1|act_loss: -0.0002484321594238281|cri_loss: 6.747245788574219e-05|unsuper_loss: 0.0
-average reward score: 5.80859375
--------------------------------------------------------------------------------------
-|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.67s (74.66%) |Training time=0.47s (20.86%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.33 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8242|ppo_ep: 1|act_loss: 0.01103973388671875|cri_loss: 0.006000518798828125|unsuper_loss: 0.0
-average reward score: 4.703125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.79%) |Training time=0.47s (21.57%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8243|ppo_ep: 1|act_loss: 0.030548095703125|cri_loss: 0.0157928466796875|unsuper_loss: 0.0
-average reward score: 6.09375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.79%) |Training time=0.47s (21.59%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8244|ppo_ep: 1|act_loss: -0.0150299072265625|cri_loss: -0.00738525390625|unsuper_loss: 0.0
-average reward score: 5.46875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.64%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8245|ppo_ep: 1|act_loss: -0.021026611328125|cri_loss: -0.01030731201171875|unsuper_loss: 0.0
-average reward score: 5.9375
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.84%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8246|ppo_ep: 1|act_loss: 0.00293731689453125|cri_loss: 0.0015716552734375|unsuper_loss: 0.0
-average reward score: 4.8984375
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.98%) |Training time=0.46s (21.38%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8247|ppo_ep: 1|act_loss: 0.0004968643188476562|cri_loss: 0.0003986358642578125|unsuper_loss: 0.0
-average reward score: 5.76171875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.84%) |Training time=0.47s (21.62%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8248|ppo_ep: 1|act_loss: 0.027130126953125|cri_loss: 0.01389312744140625|unsuper_loss: 0.0
-average reward score: 4.73828125
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.65%) |Training time=0.47s (21.74%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.57
-[2023-04-14 13:49:28,193] [INFO] [logging.py:96:log_dist] [Rank 0] step=8250, skipped=103, lr=[4.5653611371511645e-09, 4.5653611371511645e-09], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:49:28,212] [INFO] [timer.py:199:stop] epoch=0/micro_step=8250/global_step=8250, RunningAvgSamplesPerSec=105.64539594557534, CurrSamplesPerSec=105.90983071658027, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:49:28,304] [INFO] [logging.py:96:log_dist] [Rank 0] step=8250, skipped=144, lr=[4.3928225118547444e-09, 4.3928225118547444e-09], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 8249|ppo_ep: 1|act_loss: 0.00682830810546875|cri_loss: 0.0035343170166015625|unsuper_loss: 0.0
-average reward score: 5.765625
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.46s (21.56%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8250|ppo_ep: 1|act_loss: 0.022125244140625|cri_loss: 0.01197052001953125|unsuper_loss: 0.0
-average reward score: 4.421875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.55%) |Training time=0.47s (21.54%) |Others=0.11 (4.91%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8251|ppo_ep: 1|act_loss: 0.021697998046875|cri_loss: 0.01129150390625|unsuper_loss: 0.0
-average reward score: 5.25
--------------------------------------------------------------------------------------
-|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.07%) |Training time=0.46s (21.32%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8252|ppo_ep: 1|act_loss: 0.01220703125|cri_loss: 0.00616455078125|unsuper_loss: 0.0
-average reward score: 5.37890625
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.86%) |Training time=0.47s (20.71%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.15 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8253|ppo_ep: 1|act_loss: -0.006011962890625|cri_loss: -0.00284576416015625|unsuper_loss: 0.0
-average reward score: 5.3046875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.16%) |Training time=0.46s (21.17%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8254|ppo_ep: 1|act_loss: 0.0171051025390625|cri_loss: 0.0088653564453125|unsuper_loss: 0.0
-average reward score: 5.54296875
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.99%) |Training time=0.46s (21.37%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8255|ppo_ep: 1|act_loss: -0.02935791015625|cri_loss: -0.01371002197265625|unsuper_loss: 0.0
-average reward score: 5.34375
--------------------------------------------------------------------------------------
-|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.64s (72.87%) |Training time=0.51s (22.66%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.18 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8256|ppo_ep: 1|act_loss: -0.0007495880126953125|cri_loss: 0.0006227493286132812|unsuper_loss: 0.0
-average reward score: 5.8125
--------------------------------------------------------------------------------------
-|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.05%) |Training time=0.46s (21.30%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8257|ppo_ep: 1|act_loss: 0.00316619873046875|cri_loss: 0.0016927719116210938|unsuper_loss: 0.0
-average reward score: 5.66796875
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.46s (21.35%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.57
-epoch: 0|step: 8258|ppo_ep: 1|act_loss: 0.00679779052734375|cri_loss: 0.0038509368896484375|unsuper_loss: 0.0
-average reward score: 5.43359375
--------------------------------------------------------------------------------------
-|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.46s (21.29%) |Others=0.11 (4.93%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.57
-[2023-04-14 13:49:49,728] [INFO] [logging.py:96:log_dist] [Rank 0] step=8260, skipped=103, lr=[3.793187296863779e-09, 3.793187296863779e-09], mom=[(0.9, 0.95), (0.9, 0.95)]
-[2023-04-14 13:49:49,747] [INFO] [timer.py:199:stop] epoch=0/micro_step=8260/global_step=8260, RunningAvgSamplesPerSec=105.65211622011834, CurrSamplesPerSec=163.9855291500147, MemAllocated=9.46GB, MaxMemAllocated=19.38GB
-[2023-04-14 13:49:50,315] [INFO] [logging.py:96:log_dist] [Rank 0] step=8260, skipped=144, lr=[3.840990081415141e-09, 3.840990081415141e-09], mom=[(0.9, 0.95), (0.9, 0.95)]
-epoch: 0|step: 8259|ppo_ep: 1|act_loss: 0.0166015625|cri_loss: 0.00836181640625|unsuper_loss: 0.0
-average reward score: 6.16015625
+[2023-04-21 23:38:45,582] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+[2023-04-21 23:38:45,666] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 0|ppo_ep: 1|act_loss: 0.231689453125|cri_loss: 0.1451416015625|unsuper_loss: 0.0
+average reward score: -3.01171875
+-------------------------------------------------------------------------------------
+|E2E latency=3.51s |Gather latency=0.00s (0.00%) |Generate time=2.68s (76.22%) |Training time=0.74s (20.94%) |Others=0.10 (2.84%)|CurSamplesPerSec=9.12 |AvgSamplesPerSec=9.12
+[2023-04-21 23:38:47,689] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+[2023-04-21 23:38:47,773] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 1|ppo_ep: 1|act_loss: 0.09918212890625|cri_loss: 0.07025146484375|unsuper_loss: 0.0
+average reward score: -3.490234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.63s (77.24%) |Training time=0.38s (18.05%) |Others=0.10 (4.71%)|CurSamplesPerSec=15.19 |AvgSamplesPerSec=11.39
+[2023-04-21 23:38:49,800] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
+[2023-04-21 23:38:49,886] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
+epoch: 0|step: 2|ppo_ep: 1|act_loss: 0.02325439453125|cri_loss: 0.027374267578125|unsuper_loss: 0.0
+average reward score: -3.171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.63s (77.07%) |Training time=0.38s (18.14%) |Others=0.10 (4.79%)|CurSamplesPerSec=15.15 |AvgSamplesPerSec=12.42
+epoch: 0|step: 3|ppo_ep: 1|act_loss: 0.25341796875|cri_loss: 0.15380859375|unsuper_loss: 0.0
+average reward score: -2.859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.01%) |Training time=0.43s (19.89%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=12.94
+epoch: 0|step: 4|ppo_ep: 1|act_loss: -0.09210205078125|cri_loss: 0.01593017578125|unsuper_loss: 0.0
+average reward score: -2.75
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.62%) |Training time=0.41s (19.26%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=13.28
+[2023-04-21 23:38:56,233] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192
+epoch: 0|step: 5|ppo_ep: 1|act_loss: 0.1788330078125|cri_loss: 0.107421875|unsuper_loss: 0.0
+average reward score: -3.3203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.81%) |Training time=0.38s (18.02%) |Others=0.11 (5.17%)|CurSamplesPerSec=15.06 |AvgSamplesPerSec=13.55
+[2023-04-21 23:38:58,358] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096
+epoch: 0|step: 6|ppo_ep: 1|act_loss: 0.1064453125|cri_loss: 0.06024169921875|unsuper_loss: 0.0
+average reward score: -3.80078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.57%) |Training time=0.39s (18.15%) |Others=0.11 (5.28%)|CurSamplesPerSec=15.06 |AvgSamplesPerSec=13.75
+epoch: 0|step: 7|ppo_ep: 1|act_loss: 0.1566162109375|cri_loss: 0.09619140625|unsuper_loss: 0.0
+average reward score: -3.58984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.82s (77.86%) |Training time=0.41s (17.43%) |Others=0.11 (4.72%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=13.74
+epoch: 0|step: 8|ppo_ep: 1|act_loss: 0.1761474609375|cri_loss: 0.1156005859375|unsuper_loss: 0.0
+average reward score: -3.322265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.67%) |Training time=0.41s (19.15%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=13.86
+[2023-04-21 23:39:04,994] [INFO] [logging.py:96:log_dist] [Rank 0] step=10, skipped=5, lr=[4.825e-07, 4.825e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:39:05,012] [INFO] [timer.py:199:stop] epoch=0/micro_step=10/global_step=10, RunningAvgSamplesPerSec=133.058818951636, CurrSamplesPerSec=130.0445968859305, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:39:05,104] [INFO] [logging.py:96:log_dist] [Rank 0] step=10, skipped=3, lr=[3.5000000000000004e-07, 3.5000000000000004e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 9|ppo_ep: 1|act_loss: 0.084228515625|cri_loss: 0.0743408203125|unsuper_loss: 0.0
+average reward score: -3.283203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.07%) |Training time=0.41s (18.88%) |Others=0.11 (5.05%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=13.94
+epoch: 0|step: 10|ppo_ep: 1|act_loss: 0.1031494140625|cri_loss: 0.065673828125|unsuper_loss: 0.0
+average reward score: -3.314453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.91%) |Training time=0.41s (18.88%) |Others=0.11 (5.21%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.02
+epoch: 0|step: 11|ppo_ep: 1|act_loss: 0.2239990234375|cri_loss: 0.1685791015625|unsuper_loss: 0.0
+average reward score: -3.419921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.91%) |Training time=0.41s (18.99%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.09
+[2023-04-21 23:39:11,553] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192
+epoch: 0|step: 12|ppo_ep: 1|act_loss: -0.024169921875|cri_loss: 0.102783203125|unsuper_loss: 0.0
+average reward score: -3.212890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.22%) |Training time=0.41s (18.96%) |Others=0.10 (4.82%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.15
+epoch: 0|step: 13|ppo_ep: 1|act_loss: -0.0860595703125|cri_loss: -0.029083251953125|unsuper_loss: 0.0
+average reward score: -2.947265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.39%) |Training time=0.40s (18.51%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.20
+epoch: 0|step: 14|ppo_ep: 1|act_loss: -0.11102294921875|cri_loss: -0.04864501953125|unsuper_loss: 0.0
+average reward score: -3.197265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.89%) |Training time=0.41s (18.90%) |Others=0.11 (5.22%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.24
+epoch: 0|step: 15|ppo_ep: 1|act_loss: -0.282958984375|cri_loss: -0.09857177734375|unsuper_loss: 0.0
+average reward score: -2.9296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.79%) |Training time=0.41s (19.12%) |Others=0.11 (5.09%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.28
+epoch: 0|step: 16|ppo_ep: 1|act_loss: 0.0048980712890625|cri_loss: 0.016021728515625|unsuper_loss: 0.0
+average reward score: -3.5546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.90%) |Training time=0.41s (18.93%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.31
+epoch: 0|step: 17|ppo_ep: 1|act_loss: -0.2861328125|cri_loss: -0.10845947265625|unsuper_loss: 0.0
+average reward score: -3.591796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.94%) |Training time=0.41s (18.95%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.34
+epoch: 0|step: 18|ppo_ep: 1|act_loss: -0.137939453125|cri_loss: -0.045623779296875|unsuper_loss: 0.0
+average reward score: -2.94921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.62%) |Training time=0.41s (19.21%) |Others=0.11 (5.17%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.37
+[2023-04-21 23:39:26,503] [INFO] [logging.py:96:log_dist] [Rank 0] step=20, skipped=5, lr=[1.4475000000000001e-06, 1.4475000000000001e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:39:26,521] [INFO] [timer.py:199:stop] epoch=0/micro_step=20/global_step=20, RunningAvgSamplesPerSec=131.81286093572152, CurrSamplesPerSec=129.76547456758612, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:39:26,614] [INFO] [logging.py:96:log_dist] [Rank 0] step=20, skipped=4, lr=[8.000000000000001e-07, 8.000000000000001e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 19|ppo_ep: 1|act_loss: 0.454345703125|cri_loss: 0.290771484375|unsuper_loss: 0.0
+average reward score: -2.740234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.81%) |Training time=0.41s (19.04%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.39
+[2023-04-21 23:39:28,643] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 4096, reducing to 2048
+epoch: 0|step: 20|ppo_ep: 1|act_loss: 0.38330078125|cri_loss: 0.247802734375|unsuper_loss: 0.0
+average reward score: -3.064453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.93%) |Training time=0.38s (17.78%) |Others=0.11 (5.29%)|CurSamplesPerSec=15.09 |AvgSamplesPerSec=14.43
+epoch: 0|step: 21|ppo_ep: 1|act_loss: 0.45361328125|cri_loss: 0.27490234375|unsuper_loss: 0.0
+average reward score: -3.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.67%) |Training time=0.41s (19.10%) |Others=0.11 (5.22%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
+epoch: 0|step: 22|ppo_ep: 1|act_loss: 0.40283203125|cri_loss: 0.27197265625|unsuper_loss: 0.0
+average reward score: -2.87109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.81s (77.82%) |Training time=0.40s (17.36%) |Others=0.11 (4.82%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.41
+epoch: 0|step: 23|ppo_ep: 1|act_loss: -0.0345458984375|cri_loss: -0.0041961669921875|unsuper_loss: 0.0
+average reward score: -2.962890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.95%) |Training time=0.41s (18.92%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.42
+epoch: 0|step: 24|ppo_ep: 1|act_loss: 0.2421875|cri_loss: 0.1334228515625|unsuper_loss: 0.0
+average reward score: -3.490234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.73s (76.80%) |Training time=0.41s (18.25%) |Others=0.11 (4.95%)|CurSamplesPerSec=14.17 |AvgSamplesPerSec=14.41
+epoch: 0|step: 25|ppo_ep: 1|act_loss: 0.144775390625|cri_loss: 0.0914306640625|unsuper_loss: 0.0
+average reward score: -2.775390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.80%) |Training time=0.41s (19.07%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.43
+epoch: 0|step: 26|ppo_ep: 1|act_loss: -0.08734130859375|cri_loss: -0.032958984375|unsuper_loss: 0.0
+average reward score: -2.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.96%) |Training time=0.40s (18.85%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
+epoch: 0|step: 27|ppo_ep: 1|act_loss: 0.010284423828125|cri_loss: 0.01959228515625|unsuper_loss: 0.0
+average reward score: -2.69140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.83%) |Training time=0.41s (18.98%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.47
+epoch: 0|step: 28|ppo_ep: 1|act_loss: -0.266357421875|cri_loss: -0.11358642578125|unsuper_loss: 0.0
+average reward score: -2.99609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.69%) |Training time=0.41s (19.10%) |Others=0.11 (5.21%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.48
+[2023-04-21 23:39:48,289] [INFO] [logging.py:96:log_dist] [Rank 0] step=30, skipped=6, lr=[2.316e-06, 2.316e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:39:48,307] [INFO] [timer.py:199:stop] epoch=0/micro_step=30/global_step=30, RunningAvgSamplesPerSec=131.6430024413581, CurrSamplesPerSec=127.75158169262765, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:39:48,401] [INFO] [logging.py:96:log_dist] [Rank 0] step=30, skipped=4, lr=[1.3e-06, 1.3e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 29|ppo_ep: 1|act_loss: -0.201171875|cri_loss: -0.0841064453125|unsuper_loss: 0.0
+average reward score: -2.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.59%) |Training time=0.41s (19.23%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.49
+epoch: 0|step: 30|ppo_ep: 1|act_loss: -0.0819091796875|cri_loss: -0.03509521484375|unsuper_loss: 0.0
+average reward score: -2.072265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.48%) |Training time=0.42s (19.31%) |Others=0.11 (5.21%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.50
+epoch: 0|step: 31|ppo_ep: 1|act_loss: -0.12060546875|cri_loss: -0.043487548828125|unsuper_loss: 0.0
+average reward score: -2.43359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.02%) |Training time=0.40s (18.85%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52
+epoch: 0|step: 32|ppo_ep: 1|act_loss: -0.11376953125|cri_loss: -0.01654052734375|unsuper_loss: 0.0
+average reward score: -1.240234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.67%) |Training time=0.41s (19.15%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.53
+epoch: 0|step: 33|ppo_ep: 1|act_loss: -0.05419921875|cri_loss: -0.012725830078125|unsuper_loss: 0.0
+average reward score: -1.427734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.76%) |Training time=0.41s (19.12%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.54
+epoch: 0|step: 34|ppo_ep: 1|act_loss: 0.1273193359375|cri_loss: 0.07110595703125|unsuper_loss: 0.0
+average reward score: -2.94921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.73%) |Training time=0.41s (19.04%) |Others=0.11 (5.23%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55
+epoch: 0|step: 35|ppo_ep: 1|act_loss: 0.1138916015625|cri_loss: 0.06243896484375|unsuper_loss: 0.0
+average reward score: -2.654296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.82%) |Training time=0.41s (19.06%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
+[2023-04-21 23:40:03,431] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096
+epoch: 0|step: 36|ppo_ep: 1|act_loss: 0.059783935546875|cri_loss: 0.037017822265625|unsuper_loss: 0.0
+average reward score: -2.91796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (76.05%) |Training time=0.41s (19.15%) |Others=0.10 (4.80%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.57
+epoch: 0|step: 37|ppo_ep: 1|act_loss: 0.019195556640625|cri_loss: 0.0211334228515625|unsuper_loss: 0.0
+average reward score: -2.318359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.64s (73.69%) |Training time=0.47s (20.84%) |Others=0.12 (5.47%)|CurSamplesPerSec=14.34 |AvgSamplesPerSec=14.56
+epoch: 0|step: 38|ppo_ep: 1|act_loss: -0.214599609375|cri_loss: -0.0810546875|unsuper_loss: 0.0
+average reward score: -1.4873046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.85%) |Training time=0.41s (18.95%) |Others=0.11 (5.21%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.57
+[2023-04-21 23:40:09,846] [INFO] [logging.py:96:log_dist] [Rank 0] step=40, skipped=6, lr=[3.2810000000000004e-06, 3.2810000000000004e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:40:09,864] [INFO] [timer.py:199:stop] epoch=0/micro_step=40/global_step=40, RunningAvgSamplesPerSec=130.979460749, CurrSamplesPerSec=132.2262681750161, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:40:09,957] [INFO] [logging.py:96:log_dist] [Rank 0] step=40, skipped=5, lr=[1.75e-06, 1.75e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 39|ppo_ep: 1|act_loss: -0.10284423828125|cri_loss: -0.02862548828125|unsuper_loss: 0.0
+average reward score: -1.96875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.94%) |Training time=0.41s (18.86%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.58
+epoch: 0|step: 40|ppo_ep: 1|act_loss: 0.0841064453125|cri_loss: 0.051300048828125|unsuper_loss: 0.0
+average reward score: -2.287109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.72%) |Training time=0.41s (19.09%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.59
+epoch: 0|step: 41|ppo_ep: 1|act_loss: 0.241943359375|cri_loss: 0.13330078125|unsuper_loss: 0.0
+average reward score: -1.4775390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.82%) |Training time=0.41s (19.06%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.59
+epoch: 0|step: 42|ppo_ep: 1|act_loss: 0.169677734375|cri_loss: 0.1021728515625|unsuper_loss: 0.0
+average reward score: -2.404296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.75%) |Training time=0.41s (19.05%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.60
+epoch: 0|step: 43|ppo_ep: 1|act_loss: 0.1094970703125|cri_loss: 0.0714111328125|unsuper_loss: 0.0
+average reward score: -2.298828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.87%) |Training time=0.41s (19.02%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.61
+epoch: 0|step: 44|ppo_ep: 1|act_loss: 0.214111328125|cri_loss: 0.1400146484375|unsuper_loss: 0.0
+average reward score: -1.5478515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.86%) |Training time=0.41s (18.92%) |Others=0.11 (5.22%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.61
+epoch: 0|step: 45|ppo_ep: 1|act_loss: 0.0830078125|cri_loss: 0.06219482421875|unsuper_loss: 0.0
+average reward score: -1.7490234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.77%) |Training time=0.41s (19.12%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.62
+epoch: 0|step: 46|ppo_ep: 1|act_loss: -0.0323486328125|cri_loss: 0.013916015625|unsuper_loss: 0.0
+average reward score: -2.064453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.74%) |Training time=0.41s (19.08%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.63
+epoch: 0|step: 47|ppo_ep: 1|act_loss: 0.03997802734375|cri_loss: 0.06707763671875|unsuper_loss: 0.0
+average reward score: -2.509765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.96%) |Training time=0.41s (18.93%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.63
+epoch: 0|step: 48|ppo_ep: 1|act_loss: -0.6728515625|cri_loss: -0.2484130859375|unsuper_loss: 0.0
+average reward score: -1.5966796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.24%) |Training time=0.40s (18.56%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.64
+[2023-04-21 23:40:31,329] [INFO] [logging.py:96:log_dist] [Rank 0] step=50, skipped=6, lr=[4.2460000000000005e-06, 4.2460000000000005e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:40:31,347] [INFO] [timer.py:199:stop] epoch=0/micro_step=50/global_step=50, RunningAvgSamplesPerSec=130.87985010288241, CurrSamplesPerSec=129.97923512190965, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:40:31,439] [INFO] [logging.py:96:log_dist] [Rank 0] step=50, skipped=5, lr=[2.25e-06, 2.25e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 49|ppo_ep: 1|act_loss: 0.01155853271484375|cri_loss: 0.0257568359375|unsuper_loss: 0.0
+average reward score: -1.796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.87%) |Training time=0.41s (19.03%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.64
+epoch: 0|step: 50|ppo_ep: 1|act_loss: -0.01409912109375|cri_loss: 0.05267333984375|unsuper_loss: 0.0
+average reward score: -1.521484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.83%) |Training time=0.41s (18.98%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.65
+epoch: 0|step: 51|ppo_ep: 1|act_loss: 0.54052734375|cri_loss: 0.328125|unsuper_loss: 0.0
+average reward score: -1.8671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.57%) |Training time=0.41s (19.27%) |Others=0.11 (5.16%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.65
+epoch: 0|step: 52|ppo_ep: 1|act_loss: 0.22802734375|cri_loss: 0.130615234375|unsuper_loss: 0.0
+average reward score: -1.7666015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.04%) |Training time=0.42s (18.61%) |Others=0.19 (8.35%)|CurSamplesPerSec=14.32 |AvgSamplesPerSec=14.64
+epoch: 0|step: 53|ppo_ep: 1|act_loss: -0.07568359375|cri_loss: -0.014739990234375|unsuper_loss: 0.0
+average reward score: -1.5986328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.67s (75.98%) |Training time=0.41s (18.83%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.54 |AvgSamplesPerSec=14.64
+epoch: 0|step: 54|ppo_ep: 1|act_loss: -0.41357421875|cri_loss: -0.1488037109375|unsuper_loss: 0.0
+average reward score: -1.4091796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.73s (76.86%) |Training time=0.41s (18.17%) |Others=0.11 (4.97%)|CurSamplesPerSec=14.21 |AvgSamplesPerSec=14.63
+epoch: 0|step: 55|ppo_ep: 1|act_loss: -0.0655517578125|cri_loss: 0.0006103515625|unsuper_loss: 0.0
+average reward score: -0.7197265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.82%) |Training time=0.41s (19.03%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.64
+epoch: 0|step: 56|ppo_ep: 1|act_loss: 0.1314697265625|cri_loss: 0.0850830078125|unsuper_loss: 0.0
+average reward score: -1.974609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.49%) |Training time=0.41s (19.31%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.64
+epoch: 0|step: 57|ppo_ep: 1|act_loss: -0.0340576171875|cri_loss: 0.039306640625|unsuper_loss: 0.0
+average reward score: -1.9287109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.33%) |Training time=0.42s (19.48%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.65
+epoch: 0|step: 58|ppo_ep: 1|act_loss: 0.24560546875|cri_loss: 0.1434326171875|unsuper_loss: 0.0
+average reward score: -1.46484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.21%) |Training time=0.42s (19.61%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.65
+[2023-04-21 23:40:53,049] [INFO] [logging.py:96:log_dist] [Rank 0] step=60, skipped=6, lr=[5.211000000000001e-06, 5.211000000000001e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:40:53,067] [INFO] [timer.py:199:stop] epoch=0/micro_step=60/global_step=60, RunningAvgSamplesPerSec=130.3054528113133, CurrSamplesPerSec=124.08667168987976, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:40:53,159] [INFO] [logging.py:96:log_dist] [Rank 0] step=60, skipped=5, lr=[2.7500000000000004e-06, 2.7500000000000004e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 59|ppo_ep: 1|act_loss: 0.232421875|cri_loss: 0.18115234375|unsuper_loss: 0.0
+average reward score: -1.7568359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.26%) |Training time=0.42s (19.60%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.66
+epoch: 0|step: 60|ppo_ep: 1|act_loss: -0.14697265625|cri_loss: -0.02044677734375|unsuper_loss: 0.0
+average reward score: -2.09375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.45%) |Training time=0.41s (19.32%) |Others=0.11 (5.23%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.66
+epoch: 0|step: 61|ppo_ep: 1|act_loss: 0.0006103515625|cri_loss: 0.0204925537109375|unsuper_loss: 0.0
+average reward score: -1.90234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.42%) |Training time=0.42s (19.45%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.66
+epoch: 0|step: 62|ppo_ep: 1|act_loss: -0.245361328125|cri_loss: -0.07476806640625|unsuper_loss: 0.0
+average reward score: -1.95703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.13%) |Training time=0.42s (19.65%) |Others=0.11 (5.23%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.67
+epoch: 0|step: 63|ppo_ep: 1|act_loss: 0.01123046875|cri_loss: 0.072021484375|unsuper_loss: 0.0
+average reward score: -0.85986328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.73%) |Training time=0.41s (19.12%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.67
+epoch: 0|step: 64|ppo_ep: 1|act_loss: -0.28564453125|cri_loss: -0.0963134765625|unsuper_loss: 0.0
+average reward score: -1.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.40%) |Training time=0.42s (19.40%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.68
+[2023-04-21 23:41:05,906] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 2048, reducing to 1024
+[2023-04-21 23:41:05,991] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 4096, reducing to 2048
+epoch: 0|step: 65|ppo_ep: 1|act_loss: 1.3173828125|cri_loss: 0.8857421875|unsuper_loss: 0.0
+average reward score: -1.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.62s (76.57%) |Training time=0.39s (18.56%) |Others=0.10 (4.87%)|CurSamplesPerSec=15.15 |AvgSamplesPerSec=14.68
+epoch: 0|step: 66|ppo_ep: 1|act_loss: 1.689453125|cri_loss: 1.10546875|unsuper_loss: 0.0
+average reward score: -1.59765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.31%) |Training time=0.42s (19.49%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.69
+epoch: 0|step: 67|ppo_ep: 1|act_loss: 0.88232421875|cri_loss: 0.5634765625|unsuper_loss: 0.0
+average reward score: -1.56640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.33%) |Training time=0.42s (19.55%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.69
+epoch: 0|step: 68|ppo_ep: 1|act_loss: 2.35546875|cri_loss: 1.568359375|unsuper_loss: 0.0
+average reward score: -1.7041015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.24%) |Training time=0.41s (18.50%) |Others=0.18 (8.26%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.69
+[2023-04-21 23:41:14,535] [INFO] [logging.py:96:log_dist] [Rank 0] step=70, skipped=7, lr=[6.079500000000001e-06, 6.079500000000001e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:41:14,554] [INFO] [timer.py:199:stop] epoch=0/micro_step=70/global_step=70, RunningAvgSamplesPerSec=129.97624011610526, CurrSamplesPerSec=129.91268155851395, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:41:14,646] [INFO] [logging.py:96:log_dist] [Rank 0] step=70, skipped=6, lr=[3.2000000000000003e-06, 3.2000000000000003e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 69|ppo_ep: 1|act_loss: 0.44384765625|cri_loss: 0.298828125|unsuper_loss: 0.0
+average reward score: -0.90087890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.54%) |Training time=0.41s (19.01%) |Others=0.12 (5.45%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.69
+epoch: 0|step: 70|ppo_ep: 1|act_loss: -0.609375|cri_loss: -0.156494140625|unsuper_loss: 0.0
+average reward score: -0.9306640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.73%) |Training time=0.41s (19.01%) |Others=0.11 (5.26%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.69
+epoch: 0|step: 71|ppo_ep: 1|act_loss: 0.193603515625|cri_loss: 0.189453125|unsuper_loss: 0.0
+average reward score: -1.419921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.67%) |Training time=0.41s (19.18%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.70
+epoch: 0|step: 72|ppo_ep: 1|act_loss: 0.293701171875|cri_loss: 0.239501953125|unsuper_loss: 0.0
+average reward score: -1.36328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.33%) |Training time=0.42s (19.47%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.70
+epoch: 0|step: 73|ppo_ep: 1|act_loss: 0.0489501953125|cri_loss: 0.048370361328125|unsuper_loss: 0.0
+average reward score: -0.7578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.58%) |Training time=0.41s (19.28%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.70
+epoch: 0|step: 74|ppo_ep: 1|act_loss: -0.166259765625|cri_loss: -0.0423583984375|unsuper_loss: 0.0
+average reward score: -0.61279296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.49%) |Training time=0.41s (19.31%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.71
+epoch: 0|step: 75|ppo_ep: 1|act_loss: -0.0986328125|cri_loss: -0.0015869140625|unsuper_loss: 0.0
+average reward score: -0.86376953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.65%) |Training time=0.41s (19.12%) |Others=0.11 (5.23%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.71
+epoch: 0|step: 76|ppo_ep: 1|act_loss: -0.13525390625|cri_loss: -0.022216796875|unsuper_loss: 0.0
+average reward score: -1.052734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.45%) |Training time=0.41s (19.35%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.71
+epoch: 0|step: 77|ppo_ep: 1|act_loss: 0.55126953125|cri_loss: 0.3369140625|unsuper_loss: 0.0
+average reward score: -1.037109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.63%) |Training time=0.41s (19.23%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.72
+epoch: 0|step: 78|ppo_ep: 1|act_loss: 0.9658203125|cri_loss: 0.6396484375|unsuper_loss: 0.0
+average reward score: -1.6171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.64%) |Training time=0.41s (19.15%) |Others=0.11 (5.21%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.72
+[2023-04-21 23:41:35,943] [INFO] [logging.py:96:log_dist] [Rank 0] step=80, skipped=7, lr=[7.0445e-06, 7.0445e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:41:35,962] [INFO] [timer.py:199:stop] epoch=0/micro_step=80/global_step=80, RunningAvgSamplesPerSec=129.79324436520156, CurrSamplesPerSec=128.96262979834717, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:41:36,055] [INFO] [logging.py:96:log_dist] [Rank 0] step=80, skipped=6, lr=[3.7e-06, 3.7e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 79|ppo_ep: 1|act_loss: 0.73046875|cri_loss: 0.47705078125|unsuper_loss: 0.0
+average reward score: -1.7333984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.64%) |Training time=0.41s (19.20%) |Others=0.11 (5.17%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.72
+epoch: 0|step: 80|ppo_ep: 1|act_loss: 0.408203125|cri_loss: 0.258544921875|unsuper_loss: 0.0
+average reward score: -1.3046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.57%) |Training time=0.41s (19.21%) |Others=0.11 (5.22%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.72
+epoch: 0|step: 81|ppo_ep: 1|act_loss: 0.377685546875|cri_loss: 0.221435546875|unsuper_loss: 0.0
+average reward score: -1.1103515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.70%) |Training time=0.41s (19.16%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.73
+epoch: 0|step: 82|ppo_ep: 1|act_loss: 0.0911865234375|cri_loss: 0.067138671875|unsuper_loss: 0.0
+average reward score: -1.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.20%) |Training time=0.42s (19.60%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.73
+epoch: 0|step: 83|ppo_ep: 1|act_loss: -0.3154296875|cri_loss: -0.1004638671875|unsuper_loss: 0.0
+average reward score: -1.1083984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.82s (77.55%) |Training time=0.42s (17.74%) |Others=0.11 (4.71%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.71
+epoch: 0|step: 84|ppo_ep: 1|act_loss: -0.142822265625|cri_loss: -0.01373291015625|unsuper_loss: 0.0
+average reward score: -1.3310546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.73s (76.85%) |Training time=0.41s (18.19%) |Others=0.11 (4.96%)|CurSamplesPerSec=14.20 |AvgSamplesPerSec=14.71
+epoch: 0|step: 85|ppo_ep: 1|act_loss: 0.0579833984375|cri_loss: 0.054473876953125|unsuper_loss: 0.0
+average reward score: -1.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.82%) |Training time=0.41s (19.04%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.71
+epoch: 0|step: 86|ppo_ep: 1|act_loss: 0.6943359375|cri_loss: 0.4111328125|unsuper_loss: 0.0
+average reward score: -1.646484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.56%) |Training time=0.41s (19.19%) |Others=0.11 (5.24%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.71
+epoch: 0|step: 87|ppo_ep: 1|act_loss: 0.63916015625|cri_loss: 0.377197265625|unsuper_loss: 0.0
+average reward score: -1.8642578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.71%) |Training time=0.41s (19.17%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.72
+epoch: 0|step: 88|ppo_ep: 1|act_loss: 0.033294677734375|cri_loss: 0.0440673828125|unsuper_loss: 0.0
+average reward score: -1.29296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.58%) |Training time=0.41s (19.22%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.72
+[2023-04-21 23:41:57,701] [INFO] [logging.py:96:log_dist] [Rank 0] step=90, skipped=7, lr=[8.0095e-06, 8.0095e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:41:57,719] [INFO] [timer.py:199:stop] epoch=0/micro_step=90/global_step=90, RunningAvgSamplesPerSec=129.58072305568248, CurrSamplesPerSec=126.23191816135801, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:41:57,812] [INFO] [logging.py:96:log_dist] [Rank 0] step=90, skipped=6, lr=[4.2000000000000004e-06, 4.2000000000000004e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 89|ppo_ep: 1|act_loss: -0.26611328125|cri_loss: -0.1024169921875|unsuper_loss: 0.0
+average reward score: -1.63671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.45%) |Training time=0.42s (19.43%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.72
+epoch: 0|step: 90|ppo_ep: 1|act_loss: -0.6201171875|cri_loss: -0.181396484375|unsuper_loss: 0.0
+average reward score: -1.1005859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.52%) |Training time=0.41s (19.28%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.72
+epoch: 0|step: 91|ppo_ep: 1|act_loss: 0.00115966796875|cri_loss: 0.0325927734375|unsuper_loss: 0.0
+average reward score: -1.5341796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.55%) |Training time=0.41s (19.32%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.72
+epoch: 0|step: 92|ppo_ep: 1|act_loss: 1.02734375|cri_loss: 0.625|unsuper_loss: 0.0
+average reward score: -1.9716796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.94%) |Training time=0.43s (19.88%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.72
+epoch: 0|step: 93|ppo_ep: 1|act_loss: 0.95703125|cri_loss: 0.6015625|unsuper_loss: 0.0
+average reward score: -1.1279296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.26%) |Training time=0.42s (19.62%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.73
+epoch: 0|step: 94|ppo_ep: 1|act_loss: 0.30322265625|cri_loss: 0.19775390625|unsuper_loss: 0.0
+average reward score: -1.9072265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.30%) |Training time=0.42s (19.49%) |Others=0.11 (5.21%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.73
+epoch: 0|step: 95|ppo_ep: 1|act_loss: -0.740234375|cri_loss: -0.242919921875|unsuper_loss: 0.0
+average reward score: -1.71484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.56%) |Training time=0.41s (19.30%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.73
+epoch: 0|step: 96|ppo_ep: 1|act_loss: -0.53125|cri_loss: -0.1573486328125|unsuper_loss: 0.0
+average reward score: -1.275390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.36%) |Training time=0.42s (19.44%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.73
+epoch: 0|step: 97|ppo_ep: 1|act_loss: -0.0743408203125|cri_loss: 0.00048828125|unsuper_loss: 0.0
+average reward score: -1.3505859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.36%) |Training time=0.42s (19.52%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.73
+epoch: 0|step: 98|ppo_ep: 1|act_loss: 0.43701171875|cri_loss: 0.259765625|unsuper_loss: 0.0
+average reward score: -1.8330078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.35%) |Training time=0.42s (19.45%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.74
+[2023-04-21 23:42:19,339] [INFO] [logging.py:96:log_dist] [Rank 0] step=100, skipped=7, lr=[8.974500000000002e-06, 8.974500000000002e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:42:19,358] [INFO] [timer.py:199:stop] epoch=0/micro_step=100/global_step=100, RunningAvgSamplesPerSec=129.23506099236363, CurrSamplesPerSec=127.73638388183566, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:42:19,450] [INFO] [logging.py:96:log_dist] [Rank 0] step=100, skipped=6, lr=[4.7e-06, 4.7e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 99|ppo_ep: 1|act_loss: 0.5625|cri_loss: 0.34423828125|unsuper_loss: 0.0
+average reward score: -1.734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.79s (77.38%) |Training time=0.41s (17.87%) |Others=0.11 (4.74%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.73
+epoch: 0|step: 100|ppo_ep: 1|act_loss: 0.50439453125|cri_loss: 0.302978515625|unsuper_loss: 0.0
+average reward score: -2.001953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.34%) |Training time=0.42s (19.47%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.73
+epoch: 0|step: 101|ppo_ep: 1|act_loss: 0.120361328125|cri_loss: 0.0860595703125|unsuper_loss: 0.0
+average reward score: -1.330078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.49%) |Training time=0.41s (19.31%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.73
+epoch: 0|step: 102|ppo_ep: 1|act_loss: 0.60986328125|cri_loss: 0.358154296875|unsuper_loss: 0.0
+average reward score: -1.5244140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.45%) |Training time=0.41s (19.33%) |Others=0.11 (5.22%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.73
+epoch: 0|step: 103|ppo_ep: 1|act_loss: 0.50732421875|cri_loss: 0.30029296875|unsuper_loss: 0.0
+average reward score: -1.869140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.51%) |Training time=0.42s (19.40%) |Others=0.11 (5.09%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.73
+epoch: 0|step: 104|ppo_ep: 1|act_loss: -0.0291748046875|cri_loss: 0.01934814453125|unsuper_loss: 0.0
+average reward score: -1.810546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.45%) |Training time=0.41s (19.34%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.73
+epoch: 0|step: 105|ppo_ep: 1|act_loss: 0.12359619140625|cri_loss: 0.0972900390625|unsuper_loss: 0.0
+average reward score: -1.439453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.48%) |Training time=0.42s (19.37%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.74
+epoch: 0|step: 106|ppo_ep: 1|act_loss: -0.090087890625|cri_loss: -0.0128173828125|unsuper_loss: 0.0
+average reward score: -2.216796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.37%) |Training time=0.42s (19.44%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.74
+epoch: 0|step: 107|ppo_ep: 1|act_loss: 0.38232421875|cri_loss: 0.24658203125|unsuper_loss: 0.0
+average reward score: -2.146484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.59%) |Training time=0.41s (19.28%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.74
+epoch: 0|step: 108|ppo_ep: 1|act_loss: 0.6181640625|cri_loss: 0.38037109375|unsuper_loss: 0.0
+average reward score: -2.04296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.43%) |Training time=0.42s (19.38%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.74
+[2023-04-21 23:42:40,808] [INFO] [logging.py:96:log_dist] [Rank 0] step=110, skipped=7, lr=[9.649706174538074e-06, 9.649706174538074e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:42:40,827] [INFO] [timer.py:199:stop] epoch=0/micro_step=110/global_step=110, RunningAvgSamplesPerSec=128.96985847860012, CurrSamplesPerSec=126.8304863240554, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:42:40,919] [INFO] [logging.py:96:log_dist] [Rank 0] step=110, skipped=6, lr=[4.999729351164122e-06, 4.999729351164122e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 109|ppo_ep: 1|act_loss: -0.252197265625|cri_loss: -0.064697265625|unsuper_loss: 0.0
+average reward score: -2.1015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.49%) |Training time=0.42s (19.37%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.74
+epoch: 0|step: 110|ppo_ep: 1|act_loss: -0.040130615234375|cri_loss: 0.00396728515625|unsuper_loss: 0.0
+average reward score: -1.96484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.41%) |Training time=0.42s (19.40%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.74
+epoch: 0|step: 111|ppo_ep: 1|act_loss: 0.931640625|cri_loss: 0.5419921875|unsuper_loss: 0.0
+average reward score: -2.484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.55%) |Training time=0.41s (19.32%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.75
+epoch: 0|step: 112|ppo_ep: 1|act_loss: 0.477294921875|cri_loss: 0.284912109375|unsuper_loss: 0.0
+average reward score: -2.404296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.01%) |Training time=0.41s (18.79%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.75
+epoch: 0|step: 113|ppo_ep: 1|act_loss: 0.552734375|cri_loss: 0.347412109375|unsuper_loss: 0.0
+average reward score: -2.2421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.24%) |Training time=0.41s (18.71%) |Others=0.11 (5.06%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.75
+epoch: 0|step: 114|ppo_ep: 1|act_loss: -0.68310546875|cri_loss: -0.2467041015625|unsuper_loss: 0.0
+average reward score: -2.908203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.40s |Gather latency=0.00s (0.00%) |Generate time=1.88s (78.30%) |Training time=0.41s (17.04%) |Others=0.11 (4.67%)|CurSamplesPerSec=13.34 |AvgSamplesPerSec=14.73
+epoch: 0|step: 115|ppo_ep: 1|act_loss: 0.0360107421875|cri_loss: 0.07562255859375|unsuper_loss: 0.0
+average reward score: -2.796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.52%) |Training time=0.41s (19.35%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.73
+epoch: 0|step: 116|ppo_ep: 1|act_loss: 0.1025390625|cri_loss: 0.0999755859375|unsuper_loss: 0.0
+average reward score: -2.912109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.56%) |Training time=0.41s (19.25%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.74
+epoch: 0|step: 117|ppo_ep: 1|act_loss: 0.80419921875|cri_loss: 0.489013671875|unsuper_loss: 0.0
+average reward score: -2.669921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.61%) |Training time=0.41s (19.27%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.74
+epoch: 0|step: 118|ppo_ep: 1|act_loss: 1.23828125|cri_loss: 0.736328125|unsuper_loss: 0.0
+average reward score: -2.92578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.48%) |Training time=0.41s (19.33%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.74
+[2023-04-21 23:43:02,558] [INFO] [logging.py:96:log_dist] [Rank 0] step=120, skipped=7, lr=[9.644483606235295e-06, 9.644483606235295e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:43:02,577] [INFO] [timer.py:199:stop] epoch=0/micro_step=120/global_step=120, RunningAvgSamplesPerSec=128.92096849256106, CurrSamplesPerSec=127.93716465015905, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:43:02,669] [INFO] [logging.py:96:log_dist] [Rank 0] step=120, skipped=6, lr=[4.996685224712077e-06, 4.996685224712077e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 119|ppo_ep: 1|act_loss: 0.65234375|cri_loss: 0.39990234375|unsuper_loss: 0.0
+average reward score: -2.9765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.63%) |Training time=0.41s (19.26%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.74
+epoch: 0|step: 120|ppo_ep: 1|act_loss: -0.394775390625|cri_loss: -0.135009765625|unsuper_loss: 0.0
+average reward score: -3.55859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.73%) |Training time=0.41s (19.07%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.74
+epoch: 0|step: 121|ppo_ep: 1|act_loss: -0.919921875|cri_loss: -0.265869140625|unsuper_loss: 0.0
+average reward score: -3.49609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.60%) |Training time=0.41s (19.26%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.74
+epoch: 0|step: 122|ppo_ep: 1|act_loss: -0.70068359375|cri_loss: -0.210693359375|unsuper_loss: 0.0
+average reward score: -3.69921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.40%) |Training time=0.42s (19.40%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.74
+epoch: 0|step: 123|ppo_ep: 1|act_loss: 0.251953125|cri_loss: 0.165771484375|unsuper_loss: 0.0
+average reward score: -3.43359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.35%) |Training time=0.42s (19.44%) |Others=0.11 (5.21%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.75
+epoch: 0|step: 124|ppo_ep: 1|act_loss: 1.0546875|cri_loss: 0.6337890625|unsuper_loss: 0.0
+average reward score: -3.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.75%) |Training time=0.41s (19.02%) |Others=0.11 (5.24%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.75
+epoch: 0|step: 125|ppo_ep: 1|act_loss: 1.173828125|cri_loss: 0.6923828125|unsuper_loss: 0.0
+average reward score: -3.865234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.34%) |Training time=0.42s (19.54%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.75
+epoch: 0|step: 126|ppo_ep: 1|act_loss: 0.9951171875|cri_loss: 0.59619140625|unsuper_loss: 0.0
+average reward score: -3.61328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.41%) |Training time=0.42s (19.41%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.75
+epoch: 0|step: 127|ppo_ep: 1|act_loss: 0.256103515625|cri_loss: 0.2021484375|unsuper_loss: 0.0
+average reward score: -3.833984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.66%) |Training time=0.41s (19.22%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.75
+epoch: 0|step: 128|ppo_ep: 1|act_loss: -0.39599609375|cri_loss: -0.1229248046875|unsuper_loss: 0.0
+average reward score: -3.7109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.61%) |Training time=0.41s (19.18%) |Others=0.11 (5.21%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.75
+[2023-04-21 23:43:24,046] [INFO] [logging.py:96:log_dist] [Rank 0] step=130, skipped=7, lr=[9.632739717588912e-06, 9.632739717588912e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:43:24,064] [INFO] [timer.py:199:stop] epoch=0/micro_step=130/global_step=130, RunningAvgSamplesPerSec=128.88130010143965, CurrSamplesPerSec=137.54423533149213, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:43:24,157] [INFO] [logging.py:96:log_dist] [Rank 0] step=130, skipped=6, lr=[4.99026279355402e-06, 4.99026279355402e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 129|ppo_ep: 1|act_loss: 0.17138671875|cri_loss: 0.1422119140625|unsuper_loss: 0.0
+average reward score: -3.646484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.85%) |Training time=0.44s (20.09%) |Others=0.11 (5.06%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.75
+epoch: 0|step: 130|ppo_ep: 1|act_loss: 0.3515625|cri_loss: 0.251953125|unsuper_loss: 0.0
+average reward score: -3.71484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.30%) |Training time=0.42s (19.51%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.75
+epoch: 0|step: 131|ppo_ep: 1|act_loss: 1.07421875|cri_loss: 0.6396484375|unsuper_loss: 0.0
+average reward score: -3.5
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.56%) |Training time=0.41s (19.30%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.75
+epoch: 0|step: 132|ppo_ep: 1|act_loss: 0.6865234375|cri_loss: 0.406005859375|unsuper_loss: 0.0
+average reward score: -4.16015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.40%) |Training time=0.42s (19.40%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.76
+epoch: 0|step: 133|ppo_ep: 1|act_loss: 0.328369140625|cri_loss: 0.204833984375|unsuper_loss: 0.0
+average reward score: -3.5546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.45%) |Training time=0.42s (19.41%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.76
+epoch: 0|step: 134|ppo_ep: 1|act_loss: -0.388427734375|cri_loss: -0.111572265625|unsuper_loss: 0.0
+average reward score: -3.6015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.32%) |Training time=0.42s (19.49%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.76
+epoch: 0|step: 135|ppo_ep: 1|act_loss: -0.2587890625|cri_loss: -0.0894775390625|unsuper_loss: 0.0
+average reward score: -4.28515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.32%) |Training time=0.42s (19.56%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.76
+epoch: 0|step: 136|ppo_ep: 1|act_loss: 0.365478515625|cri_loss: 0.2197265625|unsuper_loss: 0.0
+average reward score: -3.84765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.59%) |Training time=0.41s (19.20%) |Others=0.11 (5.21%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.76
+epoch: 0|step: 137|ppo_ep: 1|act_loss: 0.2919921875|cri_loss: 0.183837890625|unsuper_loss: 0.0
+average reward score: -3.9296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.61%) |Training time=0.41s (19.27%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.76
+epoch: 0|step: 138|ppo_ep: 1|act_loss: -0.05572509765625|cri_loss: -0.000396728515625|unsuper_loss: 0.0
+average reward score: -4.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.39%) |Training time=0.42s (19.38%) |Others=0.11 (5.23%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.76
+[2023-04-21 23:43:45,499] [INFO] [logging.py:96:log_dist] [Rank 0] step=140, skipped=7, lr=[9.61449039944247e-06, 9.61449039944247e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:43:45,517] [INFO] [timer.py:199:stop] epoch=0/micro_step=140/global_step=140, RunningAvgSamplesPerSec=128.69974239896064, CurrSamplesPerSec=127.29890520329567, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:43:45,610] [INFO] [logging.py:96:log_dist] [Rank 0] step=140, skipped=6, lr=[4.980470747984265e-06, 4.980470747984265e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 139|ppo_ep: 1|act_loss: -0.386474609375|cri_loss: -0.15283203125|unsuper_loss: 0.0
+average reward score: -3.962890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.53%) |Training time=0.41s (19.34%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.76
+epoch: 0|step: 140|ppo_ep: 1|act_loss: -0.3408203125|cri_loss: -0.1376953125|unsuper_loss: 0.0
+average reward score: -4.34765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.44%) |Training time=0.42s (19.38%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.76
+epoch: 0|step: 141|ppo_ep: 1|act_loss: 0.11871337890625|cri_loss: 0.0736083984375|unsuper_loss: 0.0
+average reward score: -4.19921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.65%) |Training time=0.41s (19.21%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.77
+epoch: 0|step: 142|ppo_ep: 1|act_loss: 0.28564453125|cri_loss: 0.182373046875|unsuper_loss: 0.0
+average reward score: -4.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.75%) |Training time=0.41s (19.05%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.77
+epoch: 0|step: 143|ppo_ep: 1|act_loss: -0.1136474609375|cri_loss: -0.02484130859375|unsuper_loss: 0.0
+average reward score: -4.28515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.19%) |Training time=0.41s (18.65%) |Others=0.11 (5.16%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.77
+epoch: 0|step: 144|ppo_ep: 1|act_loss: 0.39404296875|cri_loss: 0.25|unsuper_loss: 0.0
+average reward score: -4.37109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.73s (73.73%) |Training time=0.43s (18.30%) |Others=0.19 (7.97%)|CurSamplesPerSec=13.64 |AvgSamplesPerSec=14.76
+epoch: 0|step: 145|ppo_ep: 1|act_loss: -0.01904296875|cri_loss: 0.034942626953125|unsuper_loss: 0.0
+average reward score: -4.44921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.48%) |Training time=0.41s (18.73%) |Others=0.13 (5.79%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.76
+epoch: 0|step: 146|ppo_ep: 1|act_loss: -0.2066650390625|cri_loss: -0.078125|unsuper_loss: 0.0
+average reward score: -4.71875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.89%) |Training time=0.40s (18.90%) |Others=0.11 (5.21%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.76
+epoch: 0|step: 147|ppo_ep: 1|act_loss: 0.1812744140625|cri_loss: 0.15380859375|unsuper_loss: 0.0
+average reward score: -4.43359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.86%) |Training time=0.41s (19.00%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.76
+epoch: 0|step: 148|ppo_ep: 1|act_loss: -0.1953125|cri_loss: -0.05718994140625|unsuper_loss: 0.0
+average reward score: -4.39453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.06%) |Training time=0.40s (18.74%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.76
+[2023-04-21 23:44:07,214] [INFO] [logging.py:96:log_dist] [Rank 0] step=150, skipped=7, lr=[9.589760345240206e-06, 9.589760345240206e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:44:07,233] [INFO] [timer.py:199:stop] epoch=0/micro_step=150/global_step=150, RunningAvgSamplesPerSec=128.76410862507046, CurrSamplesPerSec=131.15845699069212, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:44:07,325] [INFO] [logging.py:96:log_dist] [Rank 0] step=150, skipped=6, lr=[4.967322337776272e-06, 4.967322337776272e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 149|ppo_ep: 1|act_loss: 0.422119140625|cri_loss: 0.282958984375|unsuper_loss: 0.0
+average reward score: -4.41015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.84%) |Training time=0.41s (18.98%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.76
+epoch: 0|step: 150|ppo_ep: 1|act_loss: -0.370849609375|cri_loss: -0.1490478515625|unsuper_loss: 0.0
+average reward score: -4.79296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.47%) |Training time=0.42s (19.34%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.76
+epoch: 0|step: 151|ppo_ep: 1|act_loss: -0.1011962890625|cri_loss: 0.0057373046875|unsuper_loss: 0.0
+average reward score: -3.849609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.79%) |Training time=0.41s (18.97%) |Others=0.11 (5.24%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.76
+epoch: 0|step: 152|ppo_ep: 1|act_loss: 0.1781005859375|cri_loss: 0.1380615234375|unsuper_loss: 0.0
+average reward score: -4.3828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.62%) |Training time=0.41s (19.19%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.77
+epoch: 0|step: 153|ppo_ep: 1|act_loss: 0.027191162109375|cri_loss: 0.043060302734375|unsuper_loss: 0.0
+average reward score: -4.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.66%) |Training time=0.41s (19.19%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.77
+epoch: 0|step: 154|ppo_ep: 1|act_loss: 0.3134765625|cri_loss: 0.208251953125|unsuper_loss: 0.0
+average reward score: -4.02734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.59%) |Training time=0.41s (19.20%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.77
+epoch: 0|step: 155|ppo_ep: 1|act_loss: 0.072509765625|cri_loss: 0.06378173828125|unsuper_loss: 0.0
+average reward score: -4.1015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.57%) |Training time=0.41s (19.22%) |Others=0.11 (5.21%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.77
+epoch: 0|step: 156|ppo_ep: 1|act_loss: 0.433837890625|cri_loss: 0.249755859375|unsuper_loss: 0.0
+average reward score: -4.13671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.87%) |Training time=0.40s (18.89%) |Others=0.11 (5.25%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.77
+epoch: 0|step: 157|ppo_ep: 1|act_loss: -0.005615234375|cri_loss: 0.020538330078125|unsuper_loss: 0.0
+average reward score: -3.845703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.74%) |Training time=0.41s (19.12%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.77
+epoch: 0|step: 158|ppo_ep: 1|act_loss: -0.234619140625|cri_loss: -0.06939697265625|unsuper_loss: 0.0
+average reward score: -4.21484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.71%) |Training time=0.41s (19.08%) |Others=0.11 (5.21%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.77
+[2023-04-21 23:44:28,700] [INFO] [logging.py:96:log_dist] [Rank 0] step=160, skipped=7, lr=[9.558583017613959e-06, 9.558583017613959e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:44:28,714] [INFO] [timer.py:199:stop] epoch=0/micro_step=160/global_step=160, RunningAvgSamplesPerSec=128.69180454066543, CurrSamplesPerSec=112.99300832350458, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:44:28,809] [INFO] [logging.py:96:log_dist] [Rank 0] step=160, skipped=6, lr=[4.950835354254168e-06, 4.950835354254168e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 159|ppo_ep: 1|act_loss: 0.468505859375|cri_loss: 0.272705078125|unsuper_loss: 0.0
+average reward score: -3.94140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.60%) |Training time=0.47s (21.32%) |Others=0.11 (5.09%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.77
+epoch: 0|step: 160|ppo_ep: 1|act_loss: 0.4345703125|cri_loss: 0.247802734375|unsuper_loss: 0.0
+average reward score: -3.84765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.50%) |Training time=0.41s (19.30%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.77
+epoch: 0|step: 161|ppo_ep: 1|act_loss: 0.0673828125|cri_loss: 0.03875732421875|unsuper_loss: 0.0
+average reward score: -3.806640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.74%) |Training time=0.41s (19.03%) |Others=0.11 (5.23%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.77
+epoch: 0|step: 162|ppo_ep: 1|act_loss: 0.009613037109375|cri_loss: 0.032318115234375|unsuper_loss: 0.0
+average reward score: -3.70703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.52%) |Training time=0.41s (19.26%) |Others=0.11 (5.22%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.77
+epoch: 0|step: 163|ppo_ep: 1|act_loss: 0.33349609375|cri_loss: 0.194580078125|unsuper_loss: 0.0
+average reward score: -3.75
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.30%) |Training time=0.42s (19.57%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.77
+epoch: 0|step: 164|ppo_ep: 1|act_loss: 0.18798828125|cri_loss: 0.1229248046875|unsuper_loss: 0.0
+average reward score: -3.94140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.46%) |Training time=0.41s (19.34%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.77
+epoch: 0|step: 165|ppo_ep: 1|act_loss: 0.05999755859375|cri_loss: 0.0836181640625|unsuper_loss: 0.0
+average reward score: -3.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.38%) |Training time=0.42s (19.48%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.78
+epoch: 0|step: 166|ppo_ep: 1|act_loss: -0.01239013671875|cri_loss: 0.04156494140625|unsuper_loss: 0.0
+average reward score: -3.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.44%) |Training time=0.41s (19.34%) |Others=0.11 (5.22%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.78
+epoch: 0|step: 167|ppo_ep: 1|act_loss: -0.429443359375|cri_loss: -0.096923828125|unsuper_loss: 0.0
+average reward score: -3.73046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.57%) |Training time=0.41s (19.29%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.78
+epoch: 0|step: 168|ppo_ep: 1|act_loss: -0.4609375|cri_loss: -0.14404296875|unsuper_loss: 0.0
+average reward score: -3.419921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.47%) |Training time=0.41s (19.32%) |Others=0.11 (5.21%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.78
+[2023-04-21 23:44:50,130] [INFO] [logging.py:96:log_dist] [Rank 0] step=170, skipped=7, lr=[9.521000603104346e-06, 9.521000603104346e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:44:50,149] [INFO] [timer.py:199:stop] epoch=0/micro_step=170/global_step=170, RunningAvgSamplesPerSec=128.60437074368264, CurrSamplesPerSec=127.17719830200122, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:44:50,241] [INFO] [logging.py:96:log_dist] [Rank 0] step=170, skipped=6, lr=[4.931032106219029e-06, 4.931032106219029e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 169|ppo_ep: 1|act_loss: 0.5595703125|cri_loss: 0.408447265625|unsuper_loss: 0.0
+average reward score: -3.689453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.50%) |Training time=0.41s (19.37%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.78
+epoch: 0|step: 170|ppo_ep: 1|act_loss: -0.053466796875|cri_loss: 0.0736083984375|unsuper_loss: 0.0
+average reward score: -3.265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.38%) |Training time=0.42s (19.42%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.78
+epoch: 0|step: 171|ppo_ep: 1|act_loss: -0.260986328125|cri_loss: -0.1085205078125|unsuper_loss: 0.0
+average reward score: -3.47265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.96%) |Training time=0.40s (18.89%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.78
+epoch: 0|step: 172|ppo_ep: 1|act_loss: 0.1719970703125|cri_loss: 0.160888671875|unsuper_loss: 0.0
+average reward score: -3.751953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.73%) |Training time=0.41s (19.07%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.78
+epoch: 0|step: 173|ppo_ep: 1|act_loss: 0.1490478515625|cri_loss: 0.10223388671875|unsuper_loss: 0.0
+average reward score: -3.56640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.31%) |Training time=0.41s (18.63%) |Others=0.11 (5.06%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.78
+epoch: 0|step: 174|ppo_ep: 1|act_loss: 0.10699462890625|cri_loss: 0.09259033203125|unsuper_loss: 0.0
+average reward score: -3.125
+-------------------------------------------------------------------------------------
+|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.73s (76.95%) |Training time=0.41s (18.06%) |Others=0.11 (4.99%)|CurSamplesPerSec=14.25 |AvgSamplesPerSec=14.78
+epoch: 0|step: 175|ppo_ep: 1|act_loss: 0.077392578125|cri_loss: 0.059234619140625|unsuper_loss: 0.0
+average reward score: -2.970703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.79s (77.47%) |Training time=0.41s (17.77%) |Others=0.11 (4.76%)|CurSamplesPerSec=13.82 |AvgSamplesPerSec=14.77
+epoch: 0|step: 176|ppo_ep: 1|act_loss: -0.00567626953125|cri_loss: 0.0218963623046875|unsuper_loss: 0.0
+average reward score: -3.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.10%) |Training time=0.40s (18.73%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.77
+epoch: 0|step: 177|ppo_ep: 1|act_loss: -0.052734375|cri_loss: 0.01092529296875|unsuper_loss: 0.0
+average reward score: -3.2734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.85%) |Training time=0.41s (19.00%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.77
+epoch: 0|step: 178|ppo_ep: 1|act_loss: 0.16015625|cri_loss: 0.0904541015625|unsuper_loss: 0.0
+average reward score: -3.587890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.76%) |Training time=0.41s (19.04%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.77
+[2023-04-21 23:45:11,884] [INFO] [logging.py:96:log_dist] [Rank 0] step=180, skipped=7, lr=[9.47706395507748e-06, 9.47706395507748e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:45:11,903] [INFO] [timer.py:199:stop] epoch=0/micro_step=180/global_step=180, RunningAvgSamplesPerSec=128.7054493106549, CurrSamplesPerSec=129.51103786201853, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:45:11,995] [INFO] [logging.py:96:log_dist] [Rank 0] step=180, skipped=6, lr=[4.907939389762475e-06, 4.907939389762475e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 179|ppo_ep: 1|act_loss: 0.210693359375|cri_loss: 0.1358642578125|unsuper_loss: 0.0
+average reward score: -3.1171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.72%) |Training time=0.41s (19.15%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.78
+epoch: 0|step: 180|ppo_ep: 1|act_loss: 0.0718994140625|cri_loss: 0.0391845703125|unsuper_loss: 0.0
+average reward score: -3.412109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.43%) |Training time=0.42s (19.38%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.78
+epoch: 0|step: 181|ppo_ep: 1|act_loss: 0.176513671875|cri_loss: 0.0987548828125|unsuper_loss: 0.0
+average reward score: -3.40625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.88%) |Training time=0.41s (18.99%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.78
+epoch: 0|step: 182|ppo_ep: 1|act_loss: 0.27294921875|cri_loss: 0.1507568359375|unsuper_loss: 0.0
+average reward score: -3.998046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.61%) |Training time=0.41s (19.20%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.78
+epoch: 0|step: 183|ppo_ep: 1|act_loss: 0.1353759765625|cri_loss: 0.085205078125|unsuper_loss: 0.0
+average reward score: -3.275390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.60%) |Training time=0.41s (19.20%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.78
+epoch: 0|step: 184|ppo_ep: 1|act_loss: -0.13037109375|cri_loss: -0.0225830078125|unsuper_loss: 0.0
+average reward score: -3.07421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.59%) |Training time=0.41s (19.22%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.78
+epoch: 0|step: 185|ppo_ep: 1|act_loss: -0.154052734375|cri_loss: -0.0269775390625|unsuper_loss: 0.0
+average reward score: -3.068359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.49%) |Training time=0.42s (19.38%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.78
+epoch: 0|step: 186|ppo_ep: 1|act_loss: -0.08251953125|cri_loss: -0.023284912109375|unsuper_loss: 0.0
+average reward score: -3.43359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.60%) |Training time=0.41s (19.01%) |Others=0.12 (5.39%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.78
+epoch: 0|step: 187|ppo_ep: 1|act_loss: -0.1881103515625|cri_loss: -0.0428466796875|unsuper_loss: 0.0
+average reward score: -3.095703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.70%) |Training time=0.41s (19.17%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.78
+epoch: 0|step: 188|ppo_ep: 1|act_loss: -0.1611328125|cri_loss: -0.051971435546875|unsuper_loss: 0.0
+average reward score: -3.404296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.56%) |Training time=0.41s (19.25%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.78
+[2023-04-21 23:45:33,325] [INFO] [logging.py:96:log_dist] [Rank 0] step=190, skipped=7, lr=[9.426832524914468e-06, 9.426832524914468e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:45:33,344] [INFO] [timer.py:199:stop] epoch=0/micro_step=190/global_step=190, RunningAvgSamplesPerSec=128.70097836160144, CurrSamplesPerSec=129.35488963398944, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:45:33,436] [INFO] [logging.py:96:log_dist] [Rank 0] step=190, skipped=6, lr=[4.881588452008457e-06, 4.881588452008457e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 189|ppo_ep: 1|act_loss: -0.0740966796875|cri_loss: -0.0230712890625|unsuper_loss: 0.0
+average reward score: -3.45703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.71%) |Training time=0.41s (19.17%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.78
+epoch: 0|step: 190|ppo_ep: 1|act_loss: 0.0145263671875|cri_loss: 0.0230865478515625|unsuper_loss: 0.0
+average reward score: -3.482421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.79s (77.50%) |Training time=0.41s (17.68%) |Others=0.11 (4.83%)|CurSamplesPerSec=13.85 |AvgSamplesPerSec=14.78
+epoch: 0|step: 191|ppo_ep: 1|act_loss: -0.037109375|cri_loss: -0.017303466796875|unsuper_loss: 0.0
+average reward score: -3.125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.77%) |Training time=0.41s (19.01%) |Others=0.11 (5.22%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.78
+epoch: 0|step: 192|ppo_ep: 1|act_loss: 0.0064239501953125|cri_loss: 0.00643157958984375|unsuper_loss: 0.0
+average reward score: -3.06640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.53%) |Training time=0.41s (19.28%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.78
+epoch: 0|step: 193|ppo_ep: 1|act_loss: 0.1124267578125|cri_loss: 0.06622314453125|unsuper_loss: 0.0
+average reward score: -3.09765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.51%) |Training time=0.41s (19.30%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.78
+epoch: 0|step: 194|ppo_ep: 1|act_loss: 0.178466796875|cri_loss: 0.109130859375|unsuper_loss: 0.0
+average reward score: -3.4453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.58%) |Training time=0.41s (19.22%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.78
+epoch: 0|step: 195|ppo_ep: 1|act_loss: 0.2174072265625|cri_loss: 0.133056640625|unsuper_loss: 0.0
+average reward score: -3.458984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.38%) |Training time=0.42s (19.48%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.78
+epoch: 0|step: 196|ppo_ep: 1|act_loss: 0.23974609375|cri_loss: 0.1435546875|unsuper_loss: 0.0
+average reward score: -3.2734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.46%) |Training time=0.41s (19.35%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.78
+epoch: 0|step: 197|ppo_ep: 1|act_loss: 0.0567626953125|cri_loss: 0.032012939453125|unsuper_loss: 0.0
+average reward score: -3.1875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.68%) |Training time=0.41s (19.19%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.78
+epoch: 0|step: 198|ppo_ep: 1|act_loss: 0.14697265625|cri_loss: 0.0833740234375|unsuper_loss: 0.0
+average reward score: -3.265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.59%) |Training time=0.41s (19.22%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.78
+[2023-04-21 23:45:54,917] [INFO] [logging.py:96:log_dist] [Rank 0] step=200, skipped=7, lr=[9.370374281566792e-06, 9.370374281566792e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:45:54,936] [INFO] [timer.py:199:stop] epoch=0/micro_step=200/global_step=200, RunningAvgSamplesPerSec=128.6778333771853, CurrSamplesPerSec=127.40669356852047, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:45:55,028] [INFO] [logging.py:96:log_dist] [Rank 0] step=200, skipped=6, lr=[4.852014948832268e-06, 4.852014948832268e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 199|ppo_ep: 1|act_loss: -0.042144775390625|cri_loss: -0.01373291015625|unsuper_loss: 0.0
+average reward score: -3.23828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.54%) |Training time=0.41s (19.33%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.79
+epoch: 0|step: 200|ppo_ep: 1|act_loss: -0.0169677734375|cri_loss: -0.00698089599609375|unsuper_loss: 0.0
+average reward score: -2.87890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.57%) |Training time=0.41s (19.23%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.79
+epoch: 0|step: 201|ppo_ep: 1|act_loss: -0.0305633544921875|cri_loss: -0.0092926025390625|unsuper_loss: 0.0
+average reward score: -3.1171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.60%) |Training time=0.41s (19.26%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.79
+epoch: 0|step: 202|ppo_ep: 1|act_loss: -0.02667236328125|cri_loss: -0.00907135009765625|unsuper_loss: 0.0
+average reward score: -3.25390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.61%) |Training time=0.41s (19.19%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.79
+epoch: 0|step: 203|ppo_ep: 1|act_loss: -0.062408447265625|cri_loss: -0.026336669921875|unsuper_loss: 0.0
+average reward score: -3.404296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.28%) |Training time=0.41s (18.66%) |Others=0.11 (5.06%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.79
+epoch: 0|step: 204|ppo_ep: 1|act_loss: -0.08294677734375|cri_loss: -0.0308380126953125|unsuper_loss: 0.0
+average reward score: -3.1015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.75s (77.00%) |Training time=0.41s (18.07%) |Others=0.11 (4.93%)|CurSamplesPerSec=14.10 |AvgSamplesPerSec=14.78
+epoch: 0|step: 205|ppo_ep: 1|act_loss: 0.029876708984375|cri_loss: 0.0192718505859375|unsuper_loss: 0.0
+average reward score: -3.23046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.76%) |Training time=0.41s (19.12%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.78
+epoch: 0|step: 206|ppo_ep: 1|act_loss: 0.0419921875|cri_loss: 0.023468017578125|unsuper_loss: 0.0
+average reward score: -3.029296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.67%) |Training time=0.41s (17.51%) |Others=0.11 (4.82%)|CurSamplesPerSec=13.84 |AvgSamplesPerSec=14.78
+epoch: 0|step: 207|ppo_ep: 1|act_loss: 0.0136260986328125|cri_loss: 0.00995635986328125|unsuper_loss: 0.0
+average reward score: -2.93359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.92%) |Training time=0.41s (18.97%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.78
+epoch: 0|step: 208|ppo_ep: 1|act_loss: 0.1396484375|cri_loss: 0.07757568359375|unsuper_loss: 0.0
+average reward score: -2.998046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.80%) |Training time=0.41s (19.00%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.78
+[2023-04-21 23:46:16,693] [INFO] [logging.py:96:log_dist] [Rank 0] step=210, skipped=7, lr=[9.30776561958644e-06, 9.30776561958644e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:46:16,711] [INFO] [timer.py:199:stop] epoch=0/micro_step=210/global_step=210, RunningAvgSamplesPerSec=128.72842312256287, CurrSamplesPerSec=129.26693370599412, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:46:16,804] [INFO] [logging.py:96:log_dist] [Rank 0] step=210, skipped=6, lr=[4.819258896614014e-06, 4.819258896614014e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 209|ppo_ep: 1|act_loss: 0.133056640625|cri_loss: 0.07379150390625|unsuper_loss: 0.0
+average reward score: -3.14453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.79%) |Training time=0.41s (19.12%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.78
+epoch: 0|step: 210|ppo_ep: 1|act_loss: 0.1053466796875|cri_loss: 0.05902099609375|unsuper_loss: 0.0
+average reward score: -3.078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.74%) |Training time=0.41s (19.06%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.78
+epoch: 0|step: 211|ppo_ep: 1|act_loss: 0.021881103515625|cri_loss: 0.01165771484375|unsuper_loss: 0.0
+average reward score: -3.03515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.87%) |Training time=0.41s (19.00%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.78
+epoch: 0|step: 212|ppo_ep: 1|act_loss: -0.00565338134765625|cri_loss: -0.0013027191162109375|unsuper_loss: 0.0
+average reward score: -3.228515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.83%) |Training time=0.41s (18.98%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.78
+epoch: 0|step: 213|ppo_ep: 1|act_loss: -0.017486572265625|cri_loss: -0.0062713623046875|unsuper_loss: 0.0
+average reward score: -3.412109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.87%) |Training time=0.41s (18.94%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.78
+epoch: 0|step: 214|ppo_ep: 1|act_loss: -0.03656005859375|cri_loss: -0.01617431640625|unsuper_loss: 0.0
+average reward score: -3.298828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.80%) |Training time=0.41s (19.01%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.78
+epoch: 0|step: 215|ppo_ep: 1|act_loss: -0.12060546875|cri_loss: -0.0494384765625|unsuper_loss: 0.0
+average reward score: -3.34375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.96%) |Training time=0.40s (18.81%) |Others=0.11 (5.23%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.79
+epoch: 0|step: 216|ppo_ep: 1|act_loss: 0.0006313323974609375|cri_loss: 0.00394439697265625|unsuper_loss: 0.0
+average reward score: -3.203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.91%) |Training time=0.41s (18.90%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.79
+epoch: 0|step: 217|ppo_ep: 1|act_loss: -0.06268310546875|cri_loss: -0.0294189453125|unsuper_loss: 0.0
+average reward score: -3.28515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.73%) |Training time=0.41s (19.08%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.79
+epoch: 0|step: 218|ppo_ep: 1|act_loss: -0.043365478515625|cri_loss: -0.0163421630859375|unsuper_loss: 0.0
+average reward score: -3.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.91%) |Training time=0.40s (18.90%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.79
+[2023-04-21 23:46:38,128] [INFO] [logging.py:96:log_dist] [Rank 0] step=220, skipped=7, lr=[9.239091255755212e-06, 9.239091255755212e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:46:38,147] [INFO] [timer.py:199:stop] epoch=0/micro_step=220/global_step=220, RunningAvgSamplesPerSec=128.83310579299808, CurrSamplesPerSec=130.09917831217157, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:46:38,239] [INFO] [logging.py:96:log_dist] [Rank 0] step=220, skipped=6, lr=[4.783364618091804e-06, 4.783364618091804e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 219|ppo_ep: 1|act_loss: -0.01348876953125|cri_loss: -0.00437164306640625|unsuper_loss: 0.0
+average reward score: -3.30078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.80%) |Training time=0.41s (19.08%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.79
+epoch: 0|step: 220|ppo_ep: 1|act_loss: 0.0184326171875|cri_loss: 0.0106353759765625|unsuper_loss: 0.0
+average reward score: -2.9375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.80%) |Training time=0.41s (18.99%) |Others=0.11 (5.21%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.79
+epoch: 0|step: 221|ppo_ep: 1|act_loss: 0.07666015625|cri_loss: 0.0467529296875|unsuper_loss: 0.0
+average reward score: -3.046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.72s (76.54%) |Training time=0.41s (18.48%) |Others=0.11 (4.99%)|CurSamplesPerSec=14.28 |AvgSamplesPerSec=14.79
+epoch: 0|step: 222|ppo_ep: 1|act_loss: 0.0616455078125|cri_loss: 0.033416748046875|unsuper_loss: 0.0
+average reward score: -3.279296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.73%) |Training time=0.41s (19.08%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.79
+epoch: 0|step: 223|ppo_ep: 1|act_loss: 0.0288848876953125|cri_loss: 0.0172119140625|unsuper_loss: 0.0
+average reward score: -3.076171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.71%) |Training time=0.41s (19.12%) |Others=0.11 (5.17%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.79
+epoch: 0|step: 224|ppo_ep: 1|act_loss: 0.043487548828125|cri_loss: 0.024383544921875|unsuper_loss: 0.0
+average reward score: -3.6484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.91%) |Training time=0.40s (18.90%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.79
+epoch: 0|step: 225|ppo_ep: 1|act_loss: 0.0137481689453125|cri_loss: 0.00763702392578125|unsuper_loss: 0.0
+average reward score: -3.30078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.84%) |Training time=0.41s (18.95%) |Others=0.11 (5.21%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.79
+epoch: 0|step: 226|ppo_ep: 1|act_loss: -0.028289794921875|cri_loss: -0.01092529296875|unsuper_loss: 0.0
+average reward score: -3.255859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.72%) |Training time=0.41s (19.10%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79
+epoch: 0|step: 227|ppo_ep: 1|act_loss: -0.014556884765625|cri_loss: -0.006732940673828125|unsuper_loss: 0.0
+average reward score: -3.125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.00%) |Training time=0.40s (18.89%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.79
+epoch: 0|step: 228|ppo_ep: 1|act_loss: -0.014495849609375|cri_loss: -0.003444671630859375|unsuper_loss: 0.0
+average reward score: -3.138671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.83%) |Training time=0.41s (18.99%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.79
+[2023-04-21 23:46:59,663] [INFO] [logging.py:96:log_dist] [Rank 0] step=230, skipped=7, lr=[9.16444411445309e-06, 9.16444411445309e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:46:59,682] [INFO] [timer.py:199:stop] epoch=0/micro_step=230/global_step=230, RunningAvgSamplesPerSec=128.95552365421398, CurrSamplesPerSec=131.04000781059312, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:46:59,774] [INFO] [logging.py:96:log_dist] [Rank 0] step=230, skipped=6, lr=[4.74438068238795e-06, 4.74438068238795e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 229|ppo_ep: 1|act_loss: 0.048187255859375|cri_loss: 0.0258636474609375|unsuper_loss: 0.0
+average reward score: -2.927734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.89%) |Training time=0.41s (19.00%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.79
+epoch: 0|step: 230|ppo_ep: 1|act_loss: 0.04071044921875|cri_loss: 0.0237884521484375|unsuper_loss: 0.0
+average reward score: -3.255859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.62%) |Training time=0.41s (19.19%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.79
+epoch: 0|step: 231|ppo_ep: 1|act_loss: -0.0203094482421875|cri_loss: -0.0077056884765625|unsuper_loss: 0.0
+average reward score: -3.017578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.85%) |Training time=0.41s (19.03%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79
+epoch: 0|step: 232|ppo_ep: 1|act_loss: 0.045654296875|cri_loss: 0.0247802734375|unsuper_loss: 0.0
+average reward score: -2.765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.97%) |Training time=0.41s (18.88%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.79
+epoch: 0|step: 233|ppo_ep: 1|act_loss: 0.061920166015625|cri_loss: 0.032867431640625|unsuper_loss: 0.0
+average reward score: -2.89453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.23%) |Training time=0.41s (18.73%) |Others=0.11 (5.05%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.79
+epoch: 0|step: 234|ppo_ep: 1|act_loss: 0.034759521484375|cri_loss: 0.0200958251953125|unsuper_loss: 0.0
+average reward score: -2.947265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.73s (77.00%) |Training time=0.41s (18.04%) |Others=0.11 (4.97%)|CurSamplesPerSec=14.22 |AvgSamplesPerSec=14.79
+epoch: 0|step: 235|ppo_ep: 1|act_loss: 0.0184326171875|cri_loss: 0.0116729736328125|unsuper_loss: 0.0
+average reward score: -3.146484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.95%) |Training time=0.41s (18.92%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.79
+epoch: 0|step: 236|ppo_ep: 1|act_loss: -0.003536224365234375|cri_loss: -7.43865966796875e-05|unsuper_loss: 0.0
+average reward score: -3.07421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.77%) |Training time=0.40s (17.42%) |Others=0.11 (4.81%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.78
+epoch: 0|step: 237|ppo_ep: 1|act_loss: 0.0286865234375|cri_loss: 0.01538848876953125|unsuper_loss: 0.0
+average reward score: -3.251953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.93%) |Training time=0.41s (18.95%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.78
+epoch: 0|step: 238|ppo_ep: 1|act_loss: 0.00986480712890625|cri_loss: 0.005401611328125|unsuper_loss: 0.0
+average reward score: -3.1875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.92%) |Training time=0.41s (18.89%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.79
+[2023-04-21 23:47:21,463] [INFO] [logging.py:96:log_dist] [Rank 0] step=240, skipped=7, lr=[9.083925201920767e-06, 9.083925201920767e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:47:21,482] [INFO] [timer.py:199:stop] epoch=0/micro_step=240/global_step=240, RunningAvgSamplesPerSec=129.020879201406, CurrSamplesPerSec=129.08467586425778, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:47:21,574] [INFO] [logging.py:96:log_dist] [Rank 0] step=240, skipped=6, lr=[4.702359839289306e-06, 4.702359839289306e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 239|ppo_ep: 1|act_loss: 0.01123046875|cri_loss: 0.00597381591796875|unsuper_loss: 0.0
+average reward score: -2.947265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.74%) |Training time=0.41s (19.15%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.79
+epoch: 0|step: 240|ppo_ep: 1|act_loss: -0.01959228515625|cri_loss: -0.00701141357421875|unsuper_loss: 0.0
+average reward score: -2.98046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.31%) |Training time=0.42s (19.53%) |Others=0.11 (5.16%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.79
+epoch: 0|step: 241|ppo_ep: 1|act_loss: 0.024871826171875|cri_loss: 0.01422119140625|unsuper_loss: 0.0
+average reward score: -3.484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.61%) |Training time=0.41s (19.26%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.79
+epoch: 0|step: 242|ppo_ep: 1|act_loss: -0.00615692138671875|cri_loss: -0.001445770263671875|unsuper_loss: 0.0
+average reward score: -3.1171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.53%) |Training time=0.42s (19.34%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.79
+epoch: 0|step: 243|ppo_ep: 1|act_loss: 0.0120086669921875|cri_loss: 0.008575439453125|unsuper_loss: 0.0
+average reward score: -3.212890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.40%) |Training time=0.42s (19.40%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79
+epoch: 0|step: 244|ppo_ep: 1|act_loss: -0.0282745361328125|cri_loss: -0.01092529296875|unsuper_loss: 0.0
+average reward score: -3.13671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.33%) |Training time=0.42s (19.49%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.79
+epoch: 0|step: 245|ppo_ep: 1|act_loss: 0.00045680999755859375|cri_loss: 0.0008997917175292969|unsuper_loss: 0.0
+average reward score: -3.115234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.35%) |Training time=0.42s (19.53%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.79
+epoch: 0|step: 246|ppo_ep: 1|act_loss: 0.00936126708984375|cri_loss: 0.0063629150390625|unsuper_loss: 0.0
+average reward score: -3.408203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.45%) |Training time=0.42s (19.37%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.79
+epoch: 0|step: 247|ppo_ep: 1|act_loss: 0.04052734375|cri_loss: 0.0243377685546875|unsuper_loss: 0.0
+average reward score: -3.361328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.60%) |Training time=0.41s (19.28%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79
+epoch: 0|step: 248|ppo_ep: 1|act_loss: 0.0257568359375|cri_loss: 0.0141754150390625|unsuper_loss: 0.0
+average reward score: -3.10546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.43%) |Training time=0.42s (19.39%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79
+[2023-04-21 23:47:42,957] [INFO] [logging.py:96:log_dist] [Rank 0] step=250, skipped=7, lr=[8.9976434695865e-06, 8.9976434695865e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:47:42,976] [INFO] [timer.py:199:stop] epoch=0/micro_step=250/global_step=250, RunningAvgSamplesPerSec=128.88742633520977, CurrSamplesPerSec=125.36975737337413, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:47:43,068] [INFO] [logging.py:96:log_dist] [Rank 0] step=250, skipped=6, lr=[4.657358947870691e-06, 4.657358947870691e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 249|ppo_ep: 1|act_loss: 0.06134033203125|cri_loss: 0.03369140625|unsuper_loss: 0.0
+average reward score: -3.513671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.40%) |Training time=0.42s (19.48%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79
+epoch: 0|step: 250|ppo_ep: 1|act_loss: 0.021209716796875|cri_loss: 0.011810302734375|unsuper_loss: 0.0
+average reward score: -3.30859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.25%) |Training time=0.42s (19.59%) |Others=0.11 (5.16%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.79
+epoch: 0|step: 251|ppo_ep: 1|act_loss: -0.01953125|cri_loss: -0.0036773681640625|unsuper_loss: 0.0
+average reward score: -2.99609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.64%) |Training time=0.41s (17.60%) |Others=0.11 (4.75%)|CurSamplesPerSec=13.82 |AvgSamplesPerSec=14.79
+epoch: 0|step: 252|ppo_ep: 1|act_loss: -0.0347900390625|cri_loss: -0.0166778564453125|unsuper_loss: 0.0
+average reward score: -3.17578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.49%) |Training time=0.41s (19.31%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.79
+epoch: 0|step: 253|ppo_ep: 1|act_loss: 0.0095672607421875|cri_loss: 0.005504608154296875|unsuper_loss: 0.0
+average reward score: -3.162109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.48%) |Training time=0.42s (19.40%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.79
+epoch: 0|step: 254|ppo_ep: 1|act_loss: -0.015533447265625|cri_loss: -0.00714874267578125|unsuper_loss: 0.0
+average reward score: -3.15625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.59%) |Training time=0.41s (19.20%) |Others=0.11 (5.21%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.79
+epoch: 0|step: 255|ppo_ep: 1|act_loss: 0.0496826171875|cri_loss: 0.0277099609375|unsuper_loss: 0.0
+average reward score: -3.283203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.48%) |Training time=0.42s (19.40%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.79
+epoch: 0|step: 256|ppo_ep: 1|act_loss: 0.0184783935546875|cri_loss: 0.01080322265625|unsuper_loss: 0.0
+average reward score: -3.18359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.33%) |Training time=0.42s (19.49%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.79
+epoch: 0|step: 257|ppo_ep: 1|act_loss: 0.01824951171875|cri_loss: 0.009918212890625|unsuper_loss: 0.0
+average reward score: -3.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.77%) |Training time=0.41s (19.11%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.79
+epoch: 0|step: 258|ppo_ep: 1|act_loss: 0.034423828125|cri_loss: 0.02008056640625|unsuper_loss: 0.0
+average reward score: -3.064453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.45%) |Training time=0.42s (19.35%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79
+[2023-04-21 23:48:04,600] [INFO] [logging.py:96:log_dist] [Rank 0] step=260, skipped=7, lr=[8.905715666642176e-06, 8.905715666642176e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:48:04,619] [INFO] [timer.py:199:stop] epoch=0/micro_step=260/global_step=260, RunningAvgSamplesPerSec=128.81736360064403, CurrSamplesPerSec=128.79951864899283, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:48:04,711] [INFO] [logging.py:96:log_dist] [Rank 0] step=260, skipped=6, lr=[4.609438899557964e-06, 4.609438899557964e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 259|ppo_ep: 1|act_loss: -0.0223846435546875|cri_loss: -0.009674072265625|unsuper_loss: 0.0
+average reward score: -2.919921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.72%) |Training time=0.41s (19.17%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79
+epoch: 0|step: 260|ppo_ep: 1|act_loss: 0.025238037109375|cri_loss: 0.01513671875|unsuper_loss: 0.0
+average reward score: -3.04296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.51%) |Training time=0.42s (19.33%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.79
+epoch: 0|step: 261|ppo_ep: 1|act_loss: 0.0604248046875|cri_loss: 0.0311431884765625|unsuper_loss: 0.0
+average reward score: -3.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.44%) |Training time=0.42s (19.44%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.79
+epoch: 0|step: 262|ppo_ep: 1|act_loss: -0.00435638427734375|cri_loss: -0.0010166168212890625|unsuper_loss: 0.0
+average reward score: -3.255859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.76%) |Training time=0.41s (19.07%) |Others=0.11 (5.17%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.79
+epoch: 0|step: 263|ppo_ep: 1|act_loss: 0.0341796875|cri_loss: 0.0180206298828125|unsuper_loss: 0.0
+average reward score: -3.05078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.66s (74.21%) |Training time=0.42s (18.90%) |Others=0.15 (6.89%)|CurSamplesPerSec=14.32 |AvgSamplesPerSec=14.79
+epoch: 0|step: 264|ppo_ep: 1|act_loss: -0.040283203125|cri_loss: -0.0096435546875|unsuper_loss: 0.0
+average reward score: -3.310546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.29%) |Training time=0.42s (19.49%) |Others=0.11 (5.22%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.79
+epoch: 0|step: 265|ppo_ep: 1|act_loss: 0.0830078125|cri_loss: 0.044769287109375|unsuper_loss: 0.0
+average reward score: -3.2421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.29%) |Training time=0.42s (19.60%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.79
+epoch: 0|step: 266|ppo_ep: 1|act_loss: 0.024688720703125|cri_loss: 0.0130157470703125|unsuper_loss: 0.0
+average reward score: -2.9296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.79s (77.45%) |Training time=0.41s (17.74%) |Others=0.11 (4.81%)|CurSamplesPerSec=13.84 |AvgSamplesPerSec=14.79
+epoch: 0|step: 267|ppo_ep: 1|act_loss: 0.033172607421875|cri_loss: 0.0172576904296875|unsuper_loss: 0.0
+average reward score: -3.1015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.38%) |Training time=0.42s (19.48%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.79
+epoch: 0|step: 268|ppo_ep: 1|act_loss: 0.0128936767578125|cri_loss: 0.00716400146484375|unsuper_loss: 0.0
+average reward score: -3.265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.36%) |Training time=0.42s (19.46%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79
+[2023-04-21 23:48:26,362] [INFO] [logging.py:96:log_dist] [Rank 0] step=270, skipped=7, lr=[8.808266182068123e-06, 8.808266182068123e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:48:26,381] [INFO] [timer.py:199:stop] epoch=0/micro_step=270/global_step=270, RunningAvgSamplesPerSec=128.70210285554833, CurrSamplesPerSec=127.35821456360428, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:48:26,473] [INFO] [logging.py:96:log_dist] [Rank 0] step=270, skipped=6, lr=[4.558664535734864e-06, 4.558664535734864e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 269|ppo_ep: 1|act_loss: -0.032958984375|cri_loss: -0.0159454345703125|unsuper_loss: 0.0
+average reward score: -3.48828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.58%) |Training time=0.41s (19.31%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79
+epoch: 0|step: 270|ppo_ep: 1|act_loss: -0.0345458984375|cri_loss: -0.0165863037109375|unsuper_loss: 0.0
+average reward score: -3.115234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.14%) |Training time=0.42s (19.69%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.79
+epoch: 0|step: 271|ppo_ep: 1|act_loss: -0.04461669921875|cri_loss: -0.021514892578125|unsuper_loss: 0.0
+average reward score: -3.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.40%) |Training time=0.42s (19.42%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79
+epoch: 0|step: 272|ppo_ep: 1|act_loss: 0.0128631591796875|cri_loss: 0.007404327392578125|unsuper_loss: 0.0
+average reward score: -3.1171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.43%) |Training time=0.42s (19.38%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.79
+epoch: 0|step: 273|ppo_ep: 1|act_loss: -0.004085540771484375|cri_loss: -0.0012903213500976562|unsuper_loss: 0.0
+average reward score: -2.76953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.45%) |Training time=0.42s (19.41%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.79
+epoch: 0|step: 274|ppo_ep: 1|act_loss: 0.032745361328125|cri_loss: 0.0178070068359375|unsuper_loss: 0.0
+average reward score: -3.0625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.72%) |Training time=0.41s (19.06%) |Others=0.11 (5.22%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79
+epoch: 0|step: 275|ppo_ep: 1|act_loss: -0.018463134765625|cri_loss: -0.008392333984375|unsuper_loss: 0.0
+average reward score: -2.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.00%) |Training time=0.43s (19.83%) |Others=0.11 (5.17%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.79
+epoch: 0|step: 276|ppo_ep: 1|act_loss: 0.002796173095703125|cri_loss: 0.0027217864990234375|unsuper_loss: 0.0
+average reward score: -3.08984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.36%) |Training time=0.42s (19.45%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.79
+epoch: 0|step: 277|ppo_ep: 1|act_loss: 0.0311737060546875|cri_loss: 0.01837158203125|unsuper_loss: 0.0
+average reward score: -3.36328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.37%) |Training time=0.42s (19.44%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79
+epoch: 0|step: 278|ppo_ep: 1|act_loss: 0.010223388671875|cri_loss: 0.006526947021484375|unsuper_loss: 0.0
+average reward score: -3.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.23%) |Training time=0.42s (19.59%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.79
+[2023-04-21 23:48:47,839] [INFO] [logging.py:96:log_dist] [Rank 0] step=280, skipped=7, lr=[8.705426876320388e-06, 8.705426876320388e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:48:47,857] [INFO] [timer.py:199:stop] epoch=0/micro_step=280/global_step=280, RunningAvgSamplesPerSec=128.57407445120526, CurrSamplesPerSec=125.00824554262898, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:48:47,950] [INFO] [logging.py:96:log_dist] [Rank 0] step=280, skipped=6, lr=[4.5051045600050906e-06, 4.5051045600050906e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 279|ppo_ep: 1|act_loss: 0.0294342041015625|cri_loss: 0.016387939453125|unsuper_loss: 0.0
+average reward score: -3.021484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.38%) |Training time=0.42s (19.50%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.79
+epoch: 0|step: 280|ppo_ep: 1|act_loss: -0.008270263671875|cri_loss: -0.002567291259765625|unsuper_loss: 0.0
+average reward score: -3.06640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.16%) |Training time=0.42s (19.66%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.79
+epoch: 0|step: 281|ppo_ep: 1|act_loss: -0.012481689453125|cri_loss: -0.005329132080078125|unsuper_loss: 0.0
+average reward score: -3.12890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.73s (75.07%) |Training time=0.46s (20.16%) |Others=0.11 (4.78%)|CurSamplesPerSec=13.88 |AvgSamplesPerSec=14.79
+epoch: 0|step: 282|ppo_ep: 1|act_loss: 0.0285797119140625|cri_loss: 0.0158538818359375|unsuper_loss: 0.0
+average reward score: -3.10546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.29%) |Training time=0.42s (19.53%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79
+epoch: 0|step: 283|ppo_ep: 1|act_loss: 0.0206298828125|cri_loss: 0.0119476318359375|unsuper_loss: 0.0
+average reward score: -3.171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.35%) |Training time=0.42s (19.53%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.79
+epoch: 0|step: 284|ppo_ep: 1|act_loss: 0.03485107421875|cri_loss: 0.01885986328125|unsuper_loss: 0.0
+average reward score: -2.890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.44%) |Training time=0.42s (19.37%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.79
+epoch: 0|step: 285|ppo_ep: 1|act_loss: 0.02923583984375|cri_loss: 0.0161285400390625|unsuper_loss: 0.0
+average reward score: -3.009765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.14%) |Training time=0.42s (19.74%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.79
+epoch: 0|step: 286|ppo_ep: 1|act_loss: -0.007232666015625|cri_loss: -0.0033016204833984375|unsuper_loss: 0.0
+average reward score: -3.35546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.21%) |Training time=0.42s (19.60%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.79
+epoch: 0|step: 287|ppo_ep: 1|act_loss: -0.016448974609375|cri_loss: -0.00637054443359375|unsuper_loss: 0.0
+average reward score: -3.037109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.18%) |Training time=0.42s (19.64%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.79
+epoch: 0|step: 288|ppo_ep: 1|act_loss: -0.0012378692626953125|cri_loss: 0.0008544921875|unsuper_loss: 0.0
+average reward score: -3.197265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.69%) |Training time=0.41s (19.15%) |Others=0.11 (5.16%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.79
+[2023-04-21 23:49:09,494] [INFO] [logging.py:96:log_dist] [Rank 0] step=290, skipped=7, lr=[8.597336902908245e-06, 8.597336902908245e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:49:09,512] [INFO] [timer.py:199:stop] epoch=0/micro_step=290/global_step=290, RunningAvgSamplesPerSec=128.3833988250307, CurrSamplesPerSec=127.5106669200076, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:49:09,605] [INFO] [logging.py:96:log_dist] [Rank 0] step=290, skipped=6, lr=[4.448831445228368e-06, 4.448831445228368e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 289|ppo_ep: 1|act_loss: -0.0286102294921875|cri_loss: -0.0133514404296875|unsuper_loss: 0.0
+average reward score: -3.240234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.60%) |Training time=0.41s (19.27%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.79
+epoch: 0|step: 290|ppo_ep: 1|act_loss: 0.0017309188842773438|cri_loss: 0.0014438629150390625|unsuper_loss: 0.0
+average reward score: -3.078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.30%) |Training time=0.42s (19.52%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79
+epoch: 0|step: 291|ppo_ep: 1|act_loss: 0.024749755859375|cri_loss: 0.01300811767578125|unsuper_loss: 0.0
+average reward score: -3.166015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.28%) |Training time=0.42s (19.60%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.79
+epoch: 0|step: 292|ppo_ep: 1|act_loss: 0.0435791015625|cri_loss: 0.0221405029296875|unsuper_loss: 0.0
+average reward score: -3.3203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.50%) |Training time=0.41s (19.06%) |Others=0.12 (5.44%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.79
+epoch: 0|step: 293|ppo_ep: 1|act_loss: -0.0178680419921875|cri_loss: -0.0075836181640625|unsuper_loss: 0.0
+average reward score: -2.98046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.65s (74.49%) |Training time=0.45s (20.43%) |Others=0.11 (5.08%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.79
+epoch: 0|step: 294|ppo_ep: 1|act_loss: -0.00787353515625|cri_loss: -0.002841949462890625|unsuper_loss: 0.0
+average reward score: -3.28125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.30%) |Training time=0.42s (19.52%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.79
+epoch: 0|step: 295|ppo_ep: 1|act_loss: -0.031158447265625|cri_loss: -0.01505279541015625|unsuper_loss: 0.0
+average reward score: -3.064453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.48%) |Training time=0.42s (19.40%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.79
+epoch: 0|step: 296|ppo_ep: 1|act_loss: -0.013824462890625|cri_loss: -0.00616455078125|unsuper_loss: 0.0
+average reward score: -3.05078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.62s (71.92%) |Training time=0.43s (19.05%) |Others=0.20 (9.03%)|CurSamplesPerSec=14.20 |AvgSamplesPerSec=14.79
+epoch: 0|step: 297|ppo_ep: 1|act_loss: 0.004711151123046875|cri_loss: 0.003017425537109375|unsuper_loss: 0.0
+average reward score: -3.26171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.56%) |Training time=0.42s (19.33%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.79
+epoch: 0|step: 298|ppo_ep: 1|act_loss: 0.04693603515625|cri_loss: 0.024688720703125|unsuper_loss: 0.0
+average reward score: -2.8828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.48%) |Training time=0.41s (19.33%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79
+[2023-04-21 23:49:31,158] [INFO] [logging.py:96:log_dist] [Rank 0] step=300, skipped=7, lr=[8.484142520103355e-06, 8.484142520103355e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:49:31,176] [INFO] [timer.py:199:stop] epoch=0/micro_step=300/global_step=300, RunningAvgSamplesPerSec=128.25590216328837, CurrSamplesPerSec=127.5973400134045, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:49:31,269] [INFO] [logging.py:96:log_dist] [Rank 0] step=300, skipped=6, lr=[4.389921335456253e-06, 4.389921335456253e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 299|ppo_ep: 1|act_loss: 0.015655517578125|cri_loss: 0.008056640625|unsuper_loss: 0.0
+average reward score: -3.076171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.62%) |Training time=0.41s (19.27%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79
+epoch: 0|step: 300|ppo_ep: 1|act_loss: 0.003688812255859375|cri_loss: 0.0025196075439453125|unsuper_loss: 0.0
+average reward score: -3.318359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.60%) |Training time=0.41s (19.21%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.79
+epoch: 0|step: 301|ppo_ep: 1|act_loss: 0.01056671142578125|cri_loss: 0.006107330322265625|unsuper_loss: 0.0
+average reward score: -3.095703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.53%) |Training time=0.41s (19.29%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79
+epoch: 0|step: 302|ppo_ep: 1|act_loss: -0.020965576171875|cri_loss: -0.00943756103515625|unsuper_loss: 0.0
+average reward score: -3.16796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.61%) |Training time=0.41s (19.20%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79
+epoch: 0|step: 303|ppo_ep: 1|act_loss: -0.0183868408203125|cri_loss: -0.0089263916015625|unsuper_loss: 0.0
+average reward score: -3.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.63%) |Training time=0.41s (19.25%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.79
+epoch: 0|step: 304|ppo_ep: 1|act_loss: -0.04052734375|cri_loss: -0.016693115234375|unsuper_loss: 0.0
+average reward score: -3.220703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.58%) |Training time=0.41s (19.23%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.79
+epoch: 0|step: 305|ppo_ep: 1|act_loss: 0.0142059326171875|cri_loss: 0.0087127685546875|unsuper_loss: 0.0
+average reward score: -2.8828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.47%) |Training time=0.42s (19.42%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.79
+epoch: 0|step: 306|ppo_ep: 1|act_loss: 0.01409912109375|cri_loss: 0.0086669921875|unsuper_loss: 0.0
+average reward score: -3.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.51%) |Training time=0.41s (19.31%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79
+epoch: 0|step: 307|ppo_ep: 1|act_loss: -0.0184478759765625|cri_loss: -0.0086517333984375|unsuper_loss: 0.0
+average reward score: -3.12890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.67%) |Training time=0.41s (19.14%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.79
+epoch: 0|step: 308|ppo_ep: 1|act_loss: 0.01383209228515625|cri_loss: 0.007350921630859375|unsuper_loss: 0.0
+average reward score: -3.076171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.67%) |Training time=0.41s (19.13%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.79
+[2023-04-21 23:49:52,621] [INFO] [logging.py:96:log_dist] [Rank 0] step=310, skipped=7, lr=[8.36599689303536e-06, 8.36599689303536e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:49:52,639] [INFO] [timer.py:199:stop] epoch=0/micro_step=310/global_step=310, RunningAvgSamplesPerSec=128.2386799067045, CurrSamplesPerSec=127.45194167612301, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:49:52,732] [INFO] [logging.py:96:log_dist] [Rank 0] step=310, skipped=6, lr=[4.328453942900402e-06, 4.328453942900402e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 309|ppo_ep: 1|act_loss: -0.00490570068359375|cri_loss: -0.002193450927734375|unsuper_loss: 0.0
+average reward score: -3.23828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.62%) |Training time=0.41s (19.27%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.79
+epoch: 0|step: 310|ppo_ep: 1|act_loss: 0.032989501953125|cri_loss: 0.01708984375|unsuper_loss: 0.0
+average reward score: -3.41015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.45%) |Training time=0.42s (19.38%) |Others=0.11 (5.17%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79
+epoch: 0|step: 311|ppo_ep: 1|act_loss: 0.0374755859375|cri_loss: 0.02001953125|unsuper_loss: 0.0
+average reward score: -2.95703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.62%) |Training time=0.41s (19.25%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.79
+epoch: 0|step: 312|ppo_ep: 1|act_loss: 0.00566864013671875|cri_loss: 0.0037441253662109375|unsuper_loss: 0.0
+average reward score: -3.068359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.79s (77.58%) |Training time=0.41s (17.58%) |Others=0.11 (4.84%)|CurSamplesPerSec=13.88 |AvgSamplesPerSec=14.79
+epoch: 0|step: 313|ppo_ep: 1|act_loss: -0.030364990234375|cri_loss: -0.01445770263671875|unsuper_loss: 0.0
+average reward score: -2.875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.60%) |Training time=0.41s (19.27%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.79
+epoch: 0|step: 314|ppo_ep: 1|act_loss: 0.0321044921875|cri_loss: 0.0169677734375|unsuper_loss: 0.0
+average reward score: -3.283203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.57%) |Training time=0.41s (19.24%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79
+epoch: 0|step: 315|ppo_ep: 1|act_loss: 0.009674072265625|cri_loss: 0.00579071044921875|unsuper_loss: 0.0
+average reward score: -3.19921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.39%) |Training time=0.42s (19.49%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.79
+epoch: 0|step: 316|ppo_ep: 1|act_loss: 0.03387451171875|cri_loss: 0.0174560546875|unsuper_loss: 0.0
+average reward score: -3.3046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.40%) |Training time=0.42s (19.41%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.79
+epoch: 0|step: 317|ppo_ep: 1|act_loss: -0.0147857666015625|cri_loss: -0.006465911865234375|unsuper_loss: 0.0
+average reward score: -3.1953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.38%) |Training time=0.42s (19.50%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.79
+epoch: 0|step: 318|ppo_ep: 1|act_loss: -0.0088348388671875|cri_loss: -0.00360870361328125|unsuper_loss: 0.0
+average reward score: -3.21875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.68%) |Training time=0.41s (19.11%) |Others=0.11 (5.21%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.79
+[2023-04-21 23:50:14,249] [INFO] [logging.py:96:log_dist] [Rank 0] step=320, skipped=7, lr=[8.243059886441706e-06, 8.243059886441706e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:50:14,267] [INFO] [timer.py:199:stop] epoch=0/micro_step=320/global_step=320, RunningAvgSamplesPerSec=128.20887438921002, CurrSamplesPerSec=127.5467764957208, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:50:14,360] [INFO] [logging.py:96:log_dist] [Rank 0] step=320, skipped=6, lr=[4.264512440072707e-06, 4.264512440072707e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 319|ppo_ep: 1|act_loss: -0.019866943359375|cri_loss: -0.00934600830078125|unsuper_loss: 0.0
+average reward score: -3.306640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.56%) |Training time=0.41s (19.28%) |Others=0.11 (5.16%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79
+epoch: 0|step: 320|ppo_ep: 1|act_loss: -0.01428985595703125|cri_loss: -0.006603240966796875|unsuper_loss: 0.0
+average reward score: -3.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.63%) |Training time=0.41s (19.18%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.79
+epoch: 0|step: 321|ppo_ep: 1|act_loss: 0.03375244140625|cri_loss: 0.0189971923828125|unsuper_loss: 0.0
+average reward score: -3.15234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.73%) |Training time=0.41s (19.15%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.79
+epoch: 0|step: 322|ppo_ep: 1|act_loss: 0.0269317626953125|cri_loss: 0.0141143798828125|unsuper_loss: 0.0
+average reward score: -3.333984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.19%) |Training time=0.42s (19.37%) |Others=0.12 (5.44%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.79
+epoch: 0|step: 323|ppo_ep: 1|act_loss: -0.024261474609375|cri_loss: -0.0097808837890625|unsuper_loss: 0.0
+average reward score: -2.896484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.66s (74.84%) |Training time=0.44s (19.74%) |Others=0.12 (5.42%)|CurSamplesPerSec=14.46 |AvgSamplesPerSec=14.79
+epoch: 0|step: 324|ppo_ep: 1|act_loss: -0.04193115234375|cri_loss: -0.018463134765625|unsuper_loss: 0.0
+average reward score: -2.994140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.80%) |Training time=0.41s (19.01%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.79
+epoch: 0|step: 325|ppo_ep: 1|act_loss: -0.04461669921875|cri_loss: -0.015655517578125|unsuper_loss: 0.0
+average reward score: -3.23828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.49%) |Training time=0.42s (19.34%) |Others=0.11 (5.17%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.79
+epoch: 0|step: 326|ppo_ep: 1|act_loss: 0.02117919921875|cri_loss: 0.01165771484375|unsuper_loss: 0.0
+average reward score: -3.185546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.69%) |Training time=0.41s (19.11%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.79
+epoch: 0|step: 327|ppo_ep: 1|act_loss: 0.041748046875|cri_loss: 0.0219573974609375|unsuper_loss: 0.0
+average reward score: -3.2421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.75%) |Training time=0.41s (17.50%) |Others=0.11 (4.75%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.79
+epoch: 0|step: 328|ppo_ep: 1|act_loss: 0.00531768798828125|cri_loss: 0.003467559814453125|unsuper_loss: 0.0
+average reward score: -2.93359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.93%) |Training time=0.40s (18.86%) |Others=0.11 (5.21%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.79
+[2023-04-21 23:50:35,959] [INFO] [logging.py:96:log_dist] [Rank 0] step=330, skipped=7, lr=[8.11549784835209e-06, 8.11549784835209e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:50:35,977] [INFO] [timer.py:199:stop] epoch=0/micro_step=330/global_step=330, RunningAvgSamplesPerSec=128.2441069419638, CurrSamplesPerSec=128.31952120768858, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:50:36,069] [INFO] [logging.py:96:log_dist] [Rank 0] step=330, skipped=6, lr=[4.198183347243233e-06, 4.198183347243233e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 329|ppo_ep: 1|act_loss: -0.0521240234375|cri_loss: -0.0235595703125|unsuper_loss: 0.0
+average reward score: -2.904296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.70%) |Training time=0.41s (19.21%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79
+epoch: 0|step: 330|ppo_ep: 1|act_loss: -0.00556182861328125|cri_loss: -0.0016345977783203125|unsuper_loss: 0.0
+average reward score: -3.076171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.51%) |Training time=0.41s (19.30%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.79
+epoch: 0|step: 331|ppo_ep: 1|act_loss: 0.0175323486328125|cri_loss: 0.01015472412109375|unsuper_loss: 0.0
+average reward score: -3.19921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.88%) |Training time=0.41s (19.00%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.79
+epoch: 0|step: 332|ppo_ep: 1|act_loss: -0.09765625|cri_loss: -0.032867431640625|unsuper_loss: 0.0
+average reward score: -2.998046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.88%) |Training time=0.40s (18.92%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.79
+epoch: 0|step: 333|ppo_ep: 1|act_loss: -0.01300811767578125|cri_loss: -0.0055694580078125|unsuper_loss: 0.0
+average reward score: -3.279296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.85%) |Training time=0.41s (19.03%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.79
+epoch: 0|step: 334|ppo_ep: 1|act_loss: 0.03717041015625|cri_loss: 0.01959228515625|unsuper_loss: 0.0
+average reward score: -2.9921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.84%) |Training time=0.41s (18.97%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.79
+epoch: 0|step: 335|ppo_ep: 1|act_loss: -0.397216796875|cri_loss: -0.002685546875|unsuper_loss: 0.0
+average reward score: -3.240234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.66%) |Training time=0.41s (19.21%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.79
+epoch: 0|step: 336|ppo_ep: 1|act_loss: 0.016204833984375|cri_loss: 0.0084686279296875|unsuper_loss: 0.0
+average reward score: -3.3359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.88%) |Training time=0.40s (18.92%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.79
+epoch: 0|step: 337|ppo_ep: 1|act_loss: 0.00872039794921875|cri_loss: 0.005458831787109375|unsuper_loss: 0.0
+average reward score: -3.240234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.87%) |Training time=0.41s (19.01%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.80
+epoch: 0|step: 338|ppo_ep: 1|act_loss: -0.020416259765625|cri_loss: -0.00981903076171875|unsuper_loss: 0.0
+average reward score: -3.111328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.88%) |Training time=0.41s (18.91%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80
+[2023-04-21 23:50:57,389] [INFO] [logging.py:96:log_dist] [Rank 0] step=340, skipped=7, lr=[7.983483385000299e-06, 7.983483385000299e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:50:57,407] [INFO] [timer.py:199:stop] epoch=0/micro_step=340/global_step=340, RunningAvgSamplesPerSec=128.31040792702186, CurrSamplesPerSec=131.94731062605743, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:50:57,500] [INFO] [logging.py:96:log_dist] [Rank 0] step=340, skipped=6, lr=[4.129556415368261e-06, 4.129556415368261e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 339|ppo_ep: 1|act_loss: 0.0107269287109375|cri_loss: 0.006103515625|unsuper_loss: 0.0
+average reward score: -3.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.93%) |Training time=0.41s (18.94%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.80
+epoch: 0|step: 340|ppo_ep: 1|act_loss: -0.01557159423828125|cri_loss: -0.00743865966796875|unsuper_loss: 0.0
+average reward score: -3.150390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.73%) |Training time=0.41s (19.08%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.80
+epoch: 0|step: 341|ppo_ep: 1|act_loss: 0.03826904296875|cri_loss: 0.01959228515625|unsuper_loss: 0.0
+average reward score: -3.291015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.69%) |Training time=0.41s (19.19%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.80
+epoch: 0|step: 342|ppo_ep: 1|act_loss: 0.034515380859375|cri_loss: 0.0180816650390625|unsuper_loss: 0.0
+average reward score: -3.01171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.84%) |Training time=0.40s (17.35%) |Others=0.11 (4.81%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.79
+epoch: 0|step: 343|ppo_ep: 1|act_loss: -0.0108795166015625|cri_loss: -0.004795074462890625|unsuper_loss: 0.0
+average reward score: -3.037109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.01%) |Training time=0.41s (18.89%) |Others=0.11 (5.09%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.79
+epoch: 0|step: 344|ppo_ep: 1|act_loss: -0.0290069580078125|cri_loss: -0.0127410888671875|unsuper_loss: 0.0
+average reward score: -3.07421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.78%) |Training time=0.41s (19.03%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.79
+epoch: 0|step: 345|ppo_ep: 1|act_loss: -0.054351806640625|cri_loss: -0.0258941650390625|unsuper_loss: 0.0
+average reward score: -3.328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.74%) |Training time=0.41s (19.13%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.79
+epoch: 0|step: 346|ppo_ep: 1|act_loss: 0.017852783203125|cri_loss: 0.01021575927734375|unsuper_loss: 0.0
+average reward score: -3.541015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.68%) |Training time=0.41s (19.13%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.79
+epoch: 0|step: 347|ppo_ep: 1|act_loss: 0.0250396728515625|cri_loss: 0.01320648193359375|unsuper_loss: 0.0
+average reward score: -3.064453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.75%) |Training time=0.41s (19.06%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.80
+epoch: 0|step: 348|ppo_ep: 1|act_loss: 0.00946807861328125|cri_loss: 0.00527191162109375|unsuper_loss: 0.0
+average reward score: -3.248046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.77%) |Training time=0.41s (19.02%) |Others=0.11 (5.21%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80
+[2023-04-21 23:51:19,031] [INFO] [logging.py:96:log_dist] [Rank 0] step=350, skipped=7, lr=[7.84719512726795e-06, 7.84719512726795e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:51:19,049] [INFO] [timer.py:199:stop] epoch=0/micro_step=350/global_step=350, RunningAvgSamplesPerSec=128.3622663332419, CurrSamplesPerSec=130.4942063169521, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:51:19,141] [INFO] [logging.py:96:log_dist] [Rank 0] step=350, skipped=6, lr=[4.058724504646834e-06, 4.058724504646834e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 349|ppo_ep: 1|act_loss: -0.01666259765625|cri_loss: -0.00795745849609375|unsuper_loss: 0.0
+average reward score: -3.275390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.79%) |Training time=0.41s (19.04%) |Others=0.11 (5.17%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80
+epoch: 0|step: 350|ppo_ep: 1|act_loss: -0.00341796875|cri_loss: -0.0010671615600585938|unsuper_loss: 0.0
+average reward score: -3.029296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.66%) |Training time=0.41s (19.16%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.80
+epoch: 0|step: 351|ppo_ep: 1|act_loss: -0.00197601318359375|cri_loss: 0.0005855560302734375|unsuper_loss: 0.0
+average reward score: -3.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.84%) |Training time=0.41s (19.03%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.80
+epoch: 0|step: 352|ppo_ep: 1|act_loss: -0.046417236328125|cri_loss: -0.0222320556640625|unsuper_loss: 0.0
+average reward score: -3.353515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.28%) |Training time=0.42s (19.56%) |Others=0.11 (5.16%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.80
+epoch: 0|step: 353|ppo_ep: 1|act_loss: -0.042236328125|cri_loss: -0.0196533203125|unsuper_loss: 0.0
+average reward score: -3.1640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.82%) |Training time=0.43s (19.69%) |Others=0.12 (5.49%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.80
+epoch: 0|step: 354|ppo_ep: 1|act_loss: 0.1561279296875|cri_loss: 0.10369873046875|unsuper_loss: 0.0
+average reward score: -2.791015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.68%) |Training time=0.41s (19.12%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80
+epoch: 0|step: 355|ppo_ep: 1|act_loss: -0.10009765625|cri_loss: -0.047027587890625|unsuper_loss: 0.0
+average reward score: -3.47265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.86%) |Training time=0.41s (19.01%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80
+epoch: 0|step: 356|ppo_ep: 1|act_loss: -0.52490234375|cri_loss: -0.1448974609375|unsuper_loss: 0.0
+average reward score: -2.46484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.96%) |Training time=0.40s (18.84%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.80
+epoch: 0|step: 357|ppo_ep: 1|act_loss: -0.1102294921875|cri_loss: -0.044647216796875|unsuper_loss: 0.0
+average reward score: -3.310546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.81s (77.96%) |Training time=0.40s (17.19%) |Others=0.11 (4.85%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.79
+epoch: 0|step: 358|ppo_ep: 1|act_loss: -0.145263671875|cri_loss: -0.0587158203125|unsuper_loss: 0.0
+average reward score: -2.7578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.70%) |Training time=0.41s (19.11%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.79
+[2023-04-21 23:51:40,706] [INFO] [logging.py:96:log_dist] [Rank 0] step=360, skipped=7, lr=[7.70681748897618e-06, 7.70681748897618e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:51:40,725] [INFO] [timer.py:199:stop] epoch=0/micro_step=360/global_step=360, RunningAvgSamplesPerSec=128.3869736994276, CurrSamplesPerSec=130.07156673863915, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:51:40,817] [INFO] [logging.py:96:log_dist] [Rank 0] step=360, skipped=6, lr=[3.985783458870134e-06, 3.985783458870134e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 359|ppo_ep: 1|act_loss: 0.08428955078125|cri_loss: 0.08966064453125|unsuper_loss: 0.0
+average reward score: -3.318359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.83%) |Training time=0.41s (19.06%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.79
+epoch: 0|step: 360|ppo_ep: 1|act_loss: 0.00537109375|cri_loss: 0.051544189453125|unsuper_loss: 0.0
+average reward score: -2.791015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.65%) |Training time=0.41s (19.15%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.80
+epoch: 0|step: 361|ppo_ep: 1|act_loss: 0.467041015625|cri_loss: 0.276611328125|unsuper_loss: 0.0
+average reward score: -2.962890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.74%) |Training time=0.41s (19.14%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.80
+epoch: 0|step: 362|ppo_ep: 1|act_loss: -0.0123291015625|cri_loss: -0.004428863525390625|unsuper_loss: 0.0
+average reward score: -2.564453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.59%) |Training time=0.41s (19.23%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.80
+epoch: 0|step: 363|ppo_ep: 1|act_loss: 0.067138671875|cri_loss: 0.0399169921875|unsuper_loss: 0.0
+average reward score: -3.21875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.64%) |Training time=0.41s (19.17%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.80
+epoch: 0|step: 364|ppo_ep: 1|act_loss: 0.08740234375|cri_loss: 0.052581787109375|unsuper_loss: 0.0
+average reward score: -3.0
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.78%) |Training time=0.41s (19.04%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80
+epoch: 0|step: 365|ppo_ep: 1|act_loss: 0.0660400390625|cri_loss: 0.03924560546875|unsuper_loss: 0.0
+average reward score: -3.08203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.82%) |Training time=0.41s (19.05%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.80
+epoch: 0|step: 366|ppo_ep: 1|act_loss: 0.067138671875|cri_loss: 0.0382080078125|unsuper_loss: 0.0
+average reward score: -2.95703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.70%) |Training time=0.41s (19.12%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.80
+epoch: 0|step: 367|ppo_ep: 1|act_loss: -0.007171630859375|cri_loss: 0.011077880859375|unsuper_loss: 0.0
+average reward score: -2.583984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.15%) |Training time=0.40s (18.73%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.80
+epoch: 0|step: 368|ppo_ep: 1|act_loss: 0.05474853515625|cri_loss: 0.030670166015625|unsuper_loss: 0.0
+average reward score: -2.748046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.77%) |Training time=0.41s (19.03%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80
+[2023-04-21 23:52:02,156] [INFO] [logging.py:96:log_dist] [Rank 0] step=370, skipped=7, lr=[7.56254041735236e-06, 7.56254041735236e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:52:02,175] [INFO] [timer.py:199:stop] epoch=0/micro_step=370/global_step=370, RunningAvgSamplesPerSec=128.42285901291487, CurrSamplesPerSec=128.94528427142566, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:52:02,267] [INFO] [logging.py:96:log_dist] [Rank 0] step=370, skipped=6, lr=[3.910831975733717e-06, 3.910831975733717e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 369|ppo_ep: 1|act_loss: -0.0069732666015625|cri_loss: 0.0111236572265625|unsuper_loss: 0.0
+average reward score: -3.09765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.75%) |Training time=0.41s (19.15%) |Others=0.11 (5.09%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.80
+epoch: 0|step: 370|ppo_ep: 1|act_loss: 0.046630859375|cri_loss: 0.028472900390625|unsuper_loss: 0.0
+average reward score: -2.572265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.53%) |Training time=0.41s (19.29%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.80
+epoch: 0|step: 371|ppo_ep: 1|act_loss: 0.0626220703125|cri_loss: 0.036376953125|unsuper_loss: 0.0
+average reward score: -3.16796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.03%) |Training time=0.41s (18.80%) |Others=0.11 (5.17%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.80
+epoch: 0|step: 372|ppo_ep: 1|act_loss: -0.0172576904296875|cri_loss: 0.0005035400390625|unsuper_loss: 0.0
+average reward score: -2.599609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.93%) |Training time=0.41s (18.93%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.80
+epoch: 0|step: 373|ppo_ep: 1|act_loss: 0.0159454345703125|cri_loss: 0.009796142578125|unsuper_loss: 0.0
+average reward score: -3.115234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.04%) |Training time=0.46s (20.81%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.80
+epoch: 0|step: 374|ppo_ep: 1|act_loss: 0.05450439453125|cri_loss: 0.02813720703125|unsuper_loss: 0.0
+average reward score: -2.833984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.74%) |Training time=0.41s (19.07%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.80
+epoch: 0|step: 375|ppo_ep: 1|act_loss: 0.49609375|cri_loss: 0.3818359375|unsuper_loss: 0.0
+average reward score: -2.49609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.74%) |Training time=0.41s (19.11%) |Others=0.11 (5.16%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.80
+epoch: 0|step: 376|ppo_ep: 1|act_loss: 0.0114898681640625|cri_loss: 0.00635528564453125|unsuper_loss: 0.0
+average reward score: -2.494140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.87%) |Training time=0.41s (18.93%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80
+epoch: 0|step: 377|ppo_ep: 1|act_loss: -0.131103515625|cri_loss: -0.00457763671875|unsuper_loss: 0.0
+average reward score: -2.17578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.94%) |Training time=0.41s (18.95%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.80
+epoch: 0|step: 378|ppo_ep: 1|act_loss: -0.07830810546875|cri_loss: -0.0125732421875|unsuper_loss: 0.0
+average reward score: -2.91015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.66%) |Training time=0.41s (19.16%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.80
+[2023-04-21 23:52:23,689] [INFO] [logging.py:96:log_dist] [Rank 0] step=380, skipped=7, lr=[7.414559136009473e-06, 7.414559136009473e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:52:23,707] [INFO] [timer.py:199:stop] epoch=0/micro_step=380/global_step=380, RunningAvgSamplesPerSec=128.4265231794791, CurrSamplesPerSec=129.32609768130243, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:52:23,800] [INFO] [logging.py:96:log_dist] [Rank 0] step=380, skipped=6, lr=[3.833971473288084e-06, 3.833971473288084e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 379|ppo_ep: 1|act_loss: 0.0286102294921875|cri_loss: 0.016632080078125|unsuper_loss: 0.0
+average reward score: -2.91015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.75%) |Training time=0.41s (19.13%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.80
+epoch: 0|step: 380|ppo_ep: 1|act_loss: 0.005611419677734375|cri_loss: 0.0057220458984375|unsuper_loss: 0.0
+average reward score: -2.734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.57%) |Training time=0.41s (19.24%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.80
+epoch: 0|step: 381|ppo_ep: 1|act_loss: 0.0736083984375|cri_loss: 0.0499267578125|unsuper_loss: 0.0
+average reward score: -2.40234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.93%) |Training time=0.41s (18.99%) |Others=0.11 (5.08%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.80
+epoch: 0|step: 382|ppo_ep: 1|act_loss: -0.00743865966796875|cri_loss: 0.00220489501953125|unsuper_loss: 0.0
+average reward score: -2.951171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.57%) |Training time=0.42s (19.26%) |Others=0.11 (5.17%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.80
+epoch: 0|step: 383|ppo_ep: 1|act_loss: 0.1107177734375|cri_loss: 0.07763671875|unsuper_loss: 0.0
+average reward score: -2.509765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.34%) |Training time=0.43s (19.56%) |Others=0.11 (5.09%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.80
+epoch: 0|step: 384|ppo_ep: 1|act_loss: 0.1396484375|cri_loss: 0.0743408203125|unsuper_loss: 0.0
+average reward score: -2.666015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.75%) |Training time=0.41s (19.12%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80
+epoch: 0|step: 385|ppo_ep: 1|act_loss: 0.2626953125|cri_loss: 0.140869140625|unsuper_loss: 0.0
+average reward score: -2.35546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.75%) |Training time=0.41s (19.14%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.80
+epoch: 0|step: 386|ppo_ep: 1|act_loss: 0.2802734375|cri_loss: 0.16455078125|unsuper_loss: 0.0
+average reward score: -2.08203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.70%) |Training time=0.41s (19.12%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.80
+epoch: 0|step: 387|ppo_ep: 1|act_loss: 0.134521484375|cri_loss: 0.08135986328125|unsuper_loss: 0.0
+average reward score: -2.234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.00%) |Training time=0.40s (18.87%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.80
+epoch: 0|step: 388|ppo_ep: 1|act_loss: 0.43701171875|cri_loss: 0.2939453125|unsuper_loss: 0.0
+average reward score: -2.03125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.72%) |Training time=0.41s (19.07%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.80
+[2023-04-21 23:52:45,396] [INFO] [logging.py:96:log_dist] [Rank 0] step=390, skipped=7, lr=[7.263073880785904e-06, 7.263073880785904e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:52:45,415] [INFO] [timer.py:199:stop] epoch=0/micro_step=390/global_step=390, RunningAvgSamplesPerSec=128.43768873407188, CurrSamplesPerSec=130.7252716427912, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:52:45,507] [INFO] [logging.py:96:log_dist] [Rank 0] step=390, skipped=6, lr=[3.7553059527082913e-06, 3.7553059527082913e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 389|ppo_ep: 1|act_loss: -0.04669189453125|cri_loss: -0.006866455078125|unsuper_loss: 0.0
+average reward score: -1.6533203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.65%) |Training time=0.41s (17.61%) |Others=0.11 (4.74%)|CurSamplesPerSec=13.82 |AvgSamplesPerSec=14.80
+epoch: 0|step: 390|ppo_ep: 1|act_loss: 0.0146636962890625|cri_loss: 0.017120361328125|unsuper_loss: 0.0
+average reward score: -2.138671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.58%) |Training time=0.41s (19.23%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.80
+epoch: 0|step: 391|ppo_ep: 1|act_loss: -0.08355712890625|cri_loss: 0.01141357421875|unsuper_loss: 0.0
+average reward score: -1.2939453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.78%) |Training time=0.41s (19.10%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.80
+epoch: 0|step: 392|ppo_ep: 1|act_loss: 0.0709228515625|cri_loss: 0.086181640625|unsuper_loss: 0.0
+average reward score: -2.099609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.84%) |Training time=0.41s (18.98%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80
+epoch: 0|step: 393|ppo_ep: 1|act_loss: -0.19091796875|cri_loss: -0.0277099609375|unsuper_loss: 0.0
+average reward score: -1.607421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.89%) |Training time=0.41s (18.93%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80
+epoch: 0|step: 394|ppo_ep: 1|act_loss: -0.13671875|cri_loss: -0.04266357421875|unsuper_loss: 0.0
+average reward score: -2.095703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.80%) |Training time=0.41s (19.01%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.80
+epoch: 0|step: 395|ppo_ep: 1|act_loss: -0.0543212890625|cri_loss: -0.0179901123046875|unsuper_loss: 0.0
+average reward score: -2.056640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.75%) |Training time=0.41s (19.13%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.80
+epoch: 0|step: 396|ppo_ep: 1|act_loss: -0.08978271484375|cri_loss: -0.009521484375|unsuper_loss: 0.0
+average reward score: -1.8330078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.86%) |Training time=0.41s (18.89%) |Others=0.11 (5.24%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80
+epoch: 0|step: 397|ppo_ep: 1|act_loss: 0.1724853515625|cri_loss: 0.1109619140625|unsuper_loss: 0.0
+average reward score: -2.03515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.83%) |Training time=0.41s (19.03%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.80
+epoch: 0|step: 398|ppo_ep: 1|act_loss: 0.001979827880859375|cri_loss: 0.0027256011962890625|unsuper_loss: 0.0
+average reward score: -1.17578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.60%) |Training time=0.41s (19.22%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.80
+[2023-04-21 23:53:06,868] [INFO] [logging.py:96:log_dist] [Rank 0] step=400, skipped=7, lr=[7.108289628803138e-06, 7.108289628803138e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:53:06,886] [INFO] [timer.py:199:stop] epoch=0/micro_step=400/global_step=400, RunningAvgSamplesPerSec=128.47943609625108, CurrSamplesPerSec=132.42613482199326, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:53:06,978] [INFO] [logging.py:96:log_dist] [Rank 0] step=400, skipped=6, lr=[3.6749418575683005e-06, 3.6749418575683005e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 399|ppo_ep: 1|act_loss: -0.1566162109375|cri_loss: -0.063720703125|unsuper_loss: 0.0
+average reward score: -1.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.19%) |Training time=0.40s (18.74%) |Others=0.11 (5.07%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.80
+[2023-04-21 23:53:09,003] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, but hysteresis is 2. Reducing hysteresis to 1
+[2023-04-21 23:53:09,088] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 400|ppo_ep: 1|act_loss: 0.1396484375|cri_loss: 0.07928466796875|unsuper_loss: 0.0
+average reward score: -3.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.62s (76.99%) |Training time=0.38s (18.15%) |Others=0.10 (4.86%)|CurSamplesPerSec=15.18 |AvgSamplesPerSec=14.80
+epoch: 0|step: 401|ppo_ep: 1|act_loss: 0.01202392578125|cri_loss: 0.018402099609375|unsuper_loss: 0.0
+average reward score: -1.294921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.91%) |Training time=0.41s (18.89%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.80
+epoch: 0|step: 402|ppo_ep: 1|act_loss: 0.045501708984375|cri_loss: 0.03936767578125|unsuper_loss: 0.0
+average reward score: -0.3251953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.72%) |Training time=0.41s (19.10%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.80
+epoch: 0|step: 403|ppo_ep: 1|act_loss: 0.0083465576171875|cri_loss: 0.013916015625|unsuper_loss: 0.0
+average reward score: -1.5771484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.84%) |Training time=0.41s (19.03%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.80
+epoch: 0|step: 404|ppo_ep: 1|act_loss: 0.258544921875|cri_loss: 0.1500244140625|unsuper_loss: 0.0
+average reward score: -1.0009765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.63%) |Training time=0.41s (17.58%) |Others=0.11 (4.79%)|CurSamplesPerSec=13.77 |AvgSamplesPerSec=14.80
+epoch: 0|step: 405|ppo_ep: 1|act_loss: 0.07568359375|cri_loss: 0.05047607421875|unsuper_loss: 0.0
+average reward score: -1.263671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.78%) |Training time=0.41s (19.10%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.80
+epoch: 0|step: 406|ppo_ep: 1|act_loss: 0.085693359375|cri_loss: 0.05291748046875|unsuper_loss: 0.0
+average reward score: -0.6455078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.86%) |Training time=0.41s (18.92%) |Others=0.11 (5.23%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.80
+epoch: 0|step: 407|ppo_ep: 1|act_loss: 0.181884765625|cri_loss: 0.09857177734375|unsuper_loss: 0.0
+average reward score: -2.736328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.91%) |Training time=0.41s (18.98%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.80
+epoch: 0|step: 408|ppo_ep: 1|act_loss: 0.158447265625|cri_loss: 0.0875244140625|unsuper_loss: 0.0
+average reward score: -1.48046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.65%) |Training time=0.41s (19.17%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.80
+[2023-04-21 23:53:28,464] [INFO] [logging.py:96:log_dist] [Rank 0] step=410, skipped=8, lr=[6.966336175129223e-06, 6.966336175129223e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:53:28,483] [INFO] [timer.py:199:stop] epoch=0/micro_step=410/global_step=410, RunningAvgSamplesPerSec=128.54989718289136, CurrSamplesPerSec=128.8609771786822, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:53:28,575] [INFO] [logging.py:96:log_dist] [Rank 0] step=410, skipped=7, lr=[3.6012517207813124e-06, 3.6012517207813124e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 409|ppo_ep: 1|act_loss: 0.095703125|cri_loss: 0.05157470703125|unsuper_loss: 0.0
+average reward score: -2.11328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.74%) |Training time=0.41s (19.16%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.80
+epoch: 0|step: 410|ppo_ep: 1|act_loss: 0.05682373046875|cri_loss: 0.037109375|unsuper_loss: 0.0
+average reward score: -2.6484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.60%) |Training time=0.41s (19.22%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.80
+epoch: 0|step: 411|ppo_ep: 1|act_loss: -0.013153076171875|cri_loss: -0.00165557861328125|unsuper_loss: 0.0
+average reward score: -1.658203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.70%) |Training time=0.41s (19.18%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.80
+epoch: 0|step: 412|ppo_ep: 1|act_loss: -0.0157012939453125|cri_loss: 0.0013580322265625|unsuper_loss: 0.0
+average reward score: -1.9765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.27%) |Training time=0.42s (19.57%) |Others=0.11 (5.16%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.80
+epoch: 0|step: 413|ppo_ep: 1|act_loss: 0.03375244140625|cri_loss: 0.0190582275390625|unsuper_loss: 0.0
+average reward score: -2.056640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.68s (75.46%) |Training time=0.44s (19.59%) |Others=0.11 (4.95%)|CurSamplesPerSec=14.40 |AvgSamplesPerSec=14.80
+epoch: 0|step: 414|ppo_ep: 1|act_loss: -0.0297698974609375|cri_loss: -0.0040740966796875|unsuper_loss: 0.0
+average reward score: -0.94189453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.77%) |Training time=0.41s (19.05%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.80
+epoch: 0|step: 415|ppo_ep: 1|act_loss: -0.07012939453125|cri_loss: -0.0224456787109375|unsuper_loss: 0.0
+average reward score: -1.14453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.86%) |Training time=0.41s (19.02%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.80
+epoch: 0|step: 416|ppo_ep: 1|act_loss: 0.0183258056640625|cri_loss: 0.01177978515625|unsuper_loss: 0.0
+average reward score: -1.537109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.63%) |Training time=0.41s (19.21%) |Others=0.11 (5.16%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.80
+epoch: 0|step: 417|ppo_ep: 1|act_loss: 0.04827880859375|cri_loss: 0.031829833984375|unsuper_loss: 0.0
+average reward score: -1.6015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.92%) |Training time=0.40s (18.88%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.80
+epoch: 0|step: 418|ppo_ep: 1|act_loss: 0.0028438568115234375|cri_loss: 0.00577545166015625|unsuper_loss: 0.0
+average reward score: -2.248046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.76%) |Training time=0.40s (17.36%) |Others=0.11 (4.88%)|CurSamplesPerSec=13.79 |AvgSamplesPerSec=14.80
+[2023-04-21 23:53:50,196] [INFO] [logging.py:96:log_dist] [Rank 0] step=420, skipped=8, lr=[6.805864300541598e-06, 6.805864300541598e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:53:50,214] [INFO] [timer.py:199:stop] epoch=0/micro_step=420/global_step=420, RunningAvgSamplesPerSec=128.56527804260318, CurrSamplesPerSec=128.33657606523457, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:53:50,307] [INFO] [logging.py:96:log_dist] [Rank 0] step=420, skipped=7, lr=[3.5179616991058513e-06, 3.5179616991058513e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 419|ppo_ep: 1|act_loss: 0.1199951171875|cri_loss: 0.06622314453125|unsuper_loss: 0.0
+average reward score: -1.939453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.66%) |Training time=0.41s (19.22%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.80
+epoch: 0|step: 420|ppo_ep: 1|act_loss: 0.07080078125|cri_loss: 0.040679931640625|unsuper_loss: 0.0
+average reward score: -0.76171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.56%) |Training time=0.41s (19.25%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80
+epoch: 0|step: 421|ppo_ep: 1|act_loss: 0.10882568359375|cri_loss: 0.05914306640625|unsuper_loss: 0.0
+average reward score: -1.1044921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.64%) |Training time=0.41s (19.20%) |Others=0.11 (5.17%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.80
+epoch: 0|step: 422|ppo_ep: 1|act_loss: 0.1099853515625|cri_loss: 0.0634765625|unsuper_loss: 0.0
+average reward score: -1.3974609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.73%) |Training time=0.41s (19.08%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.80
+epoch: 0|step: 423|ppo_ep: 1|act_loss: 0.1416015625|cri_loss: 0.07623291015625|unsuper_loss: 0.0
+average reward score: -1.330078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.88%) |Training time=0.41s (18.99%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.80
+epoch: 0|step: 424|ppo_ep: 1|act_loss: -0.062744140625|cri_loss: -0.02197265625|unsuper_loss: 0.0
+average reward score: -2.21484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.80%) |Training time=0.41s (19.06%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.80
+epoch: 0|step: 425|ppo_ep: 1|act_loss: -0.0972900390625|cri_loss: -0.0452880859375|unsuper_loss: 0.0
+average reward score: -0.423583984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.77%) |Training time=0.41s (19.09%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.80
+epoch: 0|step: 426|ppo_ep: 1|act_loss: -0.034149169921875|cri_loss: -0.012603759765625|unsuper_loss: 0.0
+average reward score: -1.7529296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.56%) |Training time=0.41s (19.32%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.80
+epoch: 0|step: 427|ppo_ep: 1|act_loss: 0.03985595703125|cri_loss: 0.031280517578125|unsuper_loss: 0.0
+average reward score: -1.69921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.11%) |Training time=0.40s (18.77%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.80
+epoch: 0|step: 428|ppo_ep: 1|act_loss: -0.0450439453125|cri_loss: -0.0140380859375|unsuper_loss: 0.0
+average reward score: -1.677734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.90%) |Training time=0.41s (18.97%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.80
+[2023-04-21 23:54:11,627] [INFO] [logging.py:96:log_dist] [Rank 0] step=430, skipped=8, lr=[6.642712086789769e-06, 6.642712086789769e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:54:11,646] [INFO] [timer.py:199:stop] epoch=0/micro_step=430/global_step=430, RunningAvgSamplesPerSec=128.5960552961456, CurrSamplesPerSec=130.29430586188687, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:54:11,738] [INFO] [logging.py:96:log_dist] [Rank 0] step=430, skipped=7, lr=[3.43329425717549e-06, 3.43329425717549e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 429|ppo_ep: 1|act_loss: -0.02001953125|cri_loss: -0.00357818603515625|unsuper_loss: 0.0
+average reward score: -1.1015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.80%) |Training time=0.41s (19.08%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.80
+epoch: 0|step: 430|ppo_ep: 1|act_loss: -0.10736083984375|cri_loss: -0.04388427734375|unsuper_loss: 0.0
+average reward score: -0.8544921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.64%) |Training time=0.41s (19.23%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.80
+epoch: 0|step: 431|ppo_ep: 1|act_loss: -0.02264404296875|cri_loss: -0.003387451171875|unsuper_loss: 0.0
+average reward score: -1.4892578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.67%) |Training time=0.41s (19.20%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80
+epoch: 0|step: 432|ppo_ep: 1|act_loss: -0.01134490966796875|cri_loss: 1.52587890625e-05|unsuper_loss: 0.0
+average reward score: -1.4912109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.76%) |Training time=0.41s (19.11%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.80
+epoch: 0|step: 433|ppo_ep: 1|act_loss: 0.04595947265625|cri_loss: 0.03076171875|unsuper_loss: 0.0
+average reward score: -0.72021484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.87%) |Training time=0.40s (17.38%) |Others=0.11 (4.76%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.80
+epoch: 0|step: 434|ppo_ep: 1|act_loss: 0.06781005859375|cri_loss: 0.0389404296875|unsuper_loss: 0.0
+average reward score: -0.701171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.71%) |Training time=0.41s (19.17%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80
+epoch: 0|step: 435|ppo_ep: 1|act_loss: 0.159423828125|cri_loss: 0.0850830078125|unsuper_loss: 0.0
+average reward score: -1.384765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.48%) |Training time=0.42s (19.41%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.80
+epoch: 0|step: 436|ppo_ep: 1|act_loss: 0.006256103515625|cri_loss: 0.0110015869140625|unsuper_loss: 0.0
+average reward score: -0.005615234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.75%) |Training time=0.41s (19.13%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80
+epoch: 0|step: 437|ppo_ep: 1|act_loss: 0.0570068359375|cri_loss: 0.031219482421875|unsuper_loss: 0.0
+average reward score: -0.34619140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.84%) |Training time=0.41s (19.03%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.80
+epoch: 0|step: 438|ppo_ep: 1|act_loss: 0.08782958984375|cri_loss: 0.05389404296875|unsuper_loss: 0.0
+average reward score: -0.9169921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.90%) |Training time=0.41s (18.96%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.80
+[2023-04-21 23:54:33,227] [INFO] [logging.py:96:log_dist] [Rank 0] step=440, skipped=8, lr=[6.477100297743388e-06, 6.477100297743388e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:54:33,245] [INFO] [timer.py:199:stop] epoch=0/micro_step=440/global_step=440, RunningAvgSamplesPerSec=128.61663589271615, CurrSamplesPerSec=129.5595441496437, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:54:33,331] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192
+[2023-04-21 23:54:33,331] [INFO] [logging.py:96:log_dist] [Rank 0] step=440, skipped=8, lr=[3.356010516965486e-06, 3.356010516965486e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 439|ppo_ep: 1|act_loss: 0.0823974609375|cri_loss: 0.0489501953125|unsuper_loss: 0.0
+average reward score: -0.68359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.62s (76.07%) |Training time=0.41s (19.20%) |Others=0.10 (4.74%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.80
+epoch: 0|step: 440|ppo_ep: 1|act_loss: 0.05072021484375|cri_loss: 0.03387451171875|unsuper_loss: 0.0
+average reward score: -0.57275390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.75%) |Training time=0.41s (19.11%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.80
+epoch: 0|step: 441|ppo_ep: 1|act_loss: -0.08514404296875|cri_loss: -0.037109375|unsuper_loss: 0.0
+average reward score: -0.2174072265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.88%) |Training time=0.41s (18.98%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.80
+epoch: 0|step: 442|ppo_ep: 1|act_loss: -0.02337646484375|cri_loss: 0.000213623046875|unsuper_loss: 0.0
+average reward score: -1.169921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.43%) |Training time=0.41s (19.20%) |Others=0.12 (5.37%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.80
+epoch: 0|step: 443|ppo_ep: 1|act_loss: 0.06756591796875|cri_loss: 0.038299560546875|unsuper_loss: 0.0
+average reward score: -1.412109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.45%) |Training time=0.44s (19.66%) |Others=0.11 (4.90%)|CurSamplesPerSec=14.21 |AvgSamplesPerSec=14.80
+epoch: 0|step: 444|ppo_ep: 1|act_loss: 0.0401611328125|cri_loss: 0.0350341796875|unsuper_loss: 0.0
+average reward score: -0.250732421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.97%) |Training time=0.40s (18.88%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.80
+epoch: 0|step: 445|ppo_ep: 1|act_loss: 0.0426025390625|cri_loss: 0.029022216796875|unsuper_loss: 0.0
+average reward score: -0.160888671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.78%) |Training time=0.41s (19.09%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.80
+epoch: 0|step: 446|ppo_ep: 1|act_loss: -0.05694580078125|cri_loss: -0.0224609375|unsuper_loss: 0.0
+average reward score: -1.337890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.89%) |Training time=0.41s (18.98%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80
+epoch: 0|step: 447|ppo_ep: 1|act_loss: -0.225341796875|cri_loss: -0.06365966796875|unsuper_loss: 0.0
+average reward score: -0.393310546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.01%) |Training time=0.40s (18.87%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80
+epoch: 0|step: 448|ppo_ep: 1|act_loss: -0.0220947265625|cri_loss: -0.00424957275390625|unsuper_loss: 0.0
+average reward score: -1.837890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.86%) |Training time=0.40s (17.37%) |Others=0.11 (4.77%)|CurSamplesPerSec=13.86 |AvgSamplesPerSec=14.80
+[2023-04-21 23:54:54,949] [INFO] [logging.py:96:log_dist] [Rank 0] step=450, skipped=8, lr=[6.309253025362775e-06, 6.309253025362775e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:54:54,967] [INFO] [timer.py:199:stop] epoch=0/micro_step=450/global_step=450, RunningAvgSamplesPerSec=128.63759509044516, CurrSamplesPerSec=130.90759846520874, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:54:55,059] [INFO] [logging.py:96:log_dist] [Rank 0] step=450, skipped=8, lr=[3.2690430183226814e-06, 3.2690430183226814e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 449|ppo_ep: 1|act_loss: 0.0267791748046875|cri_loss: 0.0187225341796875|unsuper_loss: 0.0
+average reward score: -0.71337890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.05%) |Training time=0.41s (18.87%) |Others=0.11 (5.08%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.80
+epoch: 0|step: 450|ppo_ep: 1|act_loss: 0.006755828857421875|cri_loss: 0.0121002197265625|unsuper_loss: 0.0
+average reward score: -0.953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.91%) |Training time=0.41s (18.95%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.80
+epoch: 0|step: 451|ppo_ep: 1|act_loss: 0.0018024444580078125|cri_loss: 0.0056610107421875|unsuper_loss: 0.0
+average reward score: -0.9111328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.88%) |Training time=0.41s (18.99%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.80
+epoch: 0|step: 452|ppo_ep: 1|act_loss: 0.0628662109375|cri_loss: 0.03326416015625|unsuper_loss: 0.0
+average reward score: -0.74658203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.96%) |Training time=0.40s (18.91%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.80
+epoch: 0|step: 453|ppo_ep: 1|act_loss: 0.912109375|cri_loss: 0.58984375|unsuper_loss: 0.0
+average reward score: -0.396728515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.62s (76.04%) |Training time=0.40s (18.82%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.80
+epoch: 0|step: 454|ppo_ep: 1|act_loss: -0.08624267578125|cri_loss: -0.029388427734375|unsuper_loss: 0.0
+average reward score: -0.7763671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.92%) |Training time=0.41s (18.93%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.80
+epoch: 0|step: 455|ppo_ep: 1|act_loss: -0.068359375|cri_loss: -0.0296630859375|unsuper_loss: 0.0
+average reward score: -0.63037109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.25%) |Training time=0.40s (18.65%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.80
+epoch: 0|step: 456|ppo_ep: 1|act_loss: -0.039031982421875|cri_loss: -0.0080718994140625|unsuper_loss: 0.0
+average reward score: -1.189453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.04%) |Training time=0.40s (18.82%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.80
+epoch: 0|step: 457|ppo_ep: 1|act_loss: -0.0726318359375|cri_loss: -0.0270233154296875|unsuper_loss: 0.0
+average reward score: -1.064453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.12%) |Training time=0.40s (18.75%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.80
+epoch: 0|step: 458|ppo_ep: 1|act_loss: -0.0672607421875|cri_loss: -0.027008056640625|unsuper_loss: 0.0
+average reward score: -1.244140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.00%) |Training time=0.40s (18.86%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.80
+[2023-04-21 23:55:16,358] [INFO] [logging.py:96:log_dist] [Rank 0] step=460, skipped=8, lr=[6.139397386476498e-06, 6.139397386476498e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:55:16,377] [INFO] [timer.py:199:stop] epoch=0/micro_step=460/global_step=460, RunningAvgSamplesPerSec=128.72123352291797, CurrSamplesPerSec=131.59581302136237, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:55:16,469] [INFO] [logging.py:96:log_dist] [Rank 0] step=460, skipped=8, lr=[3.181034915272797e-06, 3.181034915272797e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 459|ppo_ep: 1|act_loss: 0.0132598876953125|cri_loss: 0.01461029052734375|unsuper_loss: 0.0
+average reward score: -0.372314453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.93%) |Training time=0.41s (18.96%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.80
+epoch: 0|step: 460|ppo_ep: 1|act_loss: 0.023040771484375|cri_loss: 0.0181884765625|unsuper_loss: 0.0
+average reward score: -1.369140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.89%) |Training time=0.41s (18.98%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.80
+epoch: 0|step: 461|ppo_ep: 1|act_loss: 0.0279541015625|cri_loss: 0.0188140869140625|unsuper_loss: 0.0
+average reward score: -1.083984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.99%) |Training time=0.40s (18.88%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.81
+epoch: 0|step: 462|ppo_ep: 1|act_loss: 0.0711669921875|cri_loss: 0.041351318359375|unsuper_loss: 0.0
+average reward score: 0.066162109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.87%) |Training time=0.41s (19.00%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 463|ppo_ep: 1|act_loss: 0.15478515625|cri_loss: 0.08837890625|unsuper_loss: 0.0
+average reward score: -0.317138671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.89%) |Training time=0.41s (18.99%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 464|ppo_ep: 1|act_loss: 0.084228515625|cri_loss: 0.044769287109375|unsuper_loss: 0.0
+average reward score: -1.904296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.77%) |Training time=0.40s (17.46%) |Others=0.11 (4.77%)|CurSamplesPerSec=13.86 |AvgSamplesPerSec=14.80
+epoch: 0|step: 465|ppo_ep: 1|act_loss: 0.0687255859375|cri_loss: 0.0438232421875|unsuper_loss: 0.0
+average reward score: 0.1611328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.28%) |Training time=0.40s (18.58%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80
+[2023-04-21 23:55:31,622] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096
+epoch: 0|step: 466|ppo_ep: 1|act_loss: 1.84765625|cri_loss: 1.236328125|unsuper_loss: 0.0
+average reward score: -1.001953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.24%) |Training time=0.41s (19.01%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.80
+epoch: 0|step: 467|ppo_ep: 1|act_loss: -0.025054931640625|cri_loss: -0.00600433349609375|unsuper_loss: 0.0
+average reward score: -1.671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.92%) |Training time=0.41s (18.95%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.80
+epoch: 0|step: 468|ppo_ep: 1|act_loss: 0.03363037109375|cri_loss: 0.0174407958984375|unsuper_loss: 0.0
+average reward score: -0.268310546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.92%) |Training time=0.41s (18.96%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.80
+[2023-04-21 23:55:37,937] [INFO] [logging.py:96:log_dist] [Rank 0] step=470, skipped=8, lr=[5.967763215465968e-06, 5.967763215465968e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:55:37,955] [INFO] [timer.py:199:stop] epoch=0/micro_step=470/global_step=470, RunningAvgSamplesPerSec=128.79156542499263, CurrSamplesPerSec=132.30629889979465, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:55:38,048] [INFO] [logging.py:96:log_dist] [Rank 0] step=470, skipped=9, lr=[3.101036303152072e-06, 3.101036303152072e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 469|ppo_ep: 1|act_loss: -0.08905029296875|cri_loss: -0.01934814453125|unsuper_loss: 0.0
+average reward score: 0.006591796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.01%) |Training time=0.40s (18.88%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80
+epoch: 0|step: 470|ppo_ep: 1|act_loss: 0.021240234375|cri_loss: 0.0150299072265625|unsuper_loss: 0.0
+average reward score: -1.2783203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.04%) |Training time=0.40s (18.84%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 471|ppo_ep: 1|act_loss: -0.0386962890625|cri_loss: -0.015380859375|unsuper_loss: 0.0
+average reward score: -0.73095703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.94%) |Training time=0.41s (18.92%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 472|ppo_ep: 1|act_loss: -0.0802001953125|cri_loss: -0.0303192138671875|unsuper_loss: 0.0
+average reward score: -1.0439453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.85%) |Training time=0.41s (19.03%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.81
+epoch: 0|step: 473|ppo_ep: 1|act_loss: 0.0972900390625|cri_loss: 0.06365966796875|unsuper_loss: 0.0
+average reward score: -1.13671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.74s (77.05%) |Training time=0.41s (18.07%) |Others=0.11 (4.88%)|CurSamplesPerSec=14.17 |AvgSamplesPerSec=14.80
+epoch: 0|step: 474|ppo_ep: 1|act_loss: -0.0191192626953125|cri_loss: 0.0012054443359375|unsuper_loss: 0.0
+average reward score: -1.75390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.76%) |Training time=0.41s (19.11%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80
+epoch: 0|step: 475|ppo_ep: 1|act_loss: 0.326904296875|cri_loss: 0.1962890625|unsuper_loss: 0.0
+average reward score: -1.3583984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.79%) |Training time=0.41s (19.04%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.80
+epoch: 0|step: 476|ppo_ep: 1|act_loss: -0.06396484375|cri_loss: -0.026275634765625|unsuper_loss: 0.0
+average reward score: -0.96875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.74%) |Training time=0.41s (19.13%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.80
+epoch: 0|step: 477|ppo_ep: 1|act_loss: 0.092529296875|cri_loss: 0.066650390625|unsuper_loss: 0.0
+average reward score: -0.70263671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.74%) |Training time=0.41s (19.12%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 478|ppo_ep: 1|act_loss: -0.096435546875|cri_loss: -0.0382080078125|unsuper_loss: 0.0
+average reward score: 0.263427734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.73s (75.51%) |Training time=0.45s (19.68%) |Others=0.11 (4.81%)|CurSamplesPerSec=13.94 |AvgSamplesPerSec=14.80
+[2023-04-21 23:55:59,655] [INFO] [logging.py:96:log_dist] [Rank 0] step=480, skipped=8, lr=[5.794582753272854e-06, 5.794582753272854e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:55:59,674] [INFO] [timer.py:199:stop] epoch=0/micro_step=480/global_step=480, RunningAvgSamplesPerSec=128.78813216201362, CurrSamplesPerSec=128.829807779064, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:55:59,766] [INFO] [logging.py:96:log_dist] [Rank 0] step=480, skipped=9, lr=[3.011380165315503e-06, 3.011380165315503e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 479|ppo_ep: 1|act_loss: -0.2470703125|cri_loss: -0.0894775390625|unsuper_loss: 0.0
+average reward score: -1.857421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.71%) |Training time=0.41s (19.18%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.80
+epoch: 0|step: 480|ppo_ep: 1|act_loss: -0.152099609375|cri_loss: -0.05255126953125|unsuper_loss: 0.0
+average reward score: -0.74072265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.56%) |Training time=0.41s (19.30%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80
+epoch: 0|step: 481|ppo_ep: 1|act_loss: -0.06427001953125|cri_loss: -0.022613525390625|unsuper_loss: 0.0
+average reward score: -1.025390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.77%) |Training time=0.41s (19.09%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.80
+epoch: 0|step: 482|ppo_ep: 1|act_loss: 0.171630859375|cri_loss: 0.1043701171875|unsuper_loss: 0.0
+average reward score: -1.021484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.58%) |Training time=0.41s (19.29%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.80
+[2023-04-21 23:56:08,239] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096
+epoch: 0|step: 483|ppo_ep: 1|act_loss: 0.046478271484375|cri_loss: 0.03521728515625|unsuper_loss: 0.0
+average reward score: -1.263671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.70%) |Training time=0.39s (18.14%) |Others=0.11 (5.16%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.80
+epoch: 0|step: 484|ppo_ep: 1|act_loss: -0.138427734375|cri_loss: -0.059814453125|unsuper_loss: 0.0
+average reward score: -0.9912109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.56%) |Training time=0.41s (19.30%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.80
+epoch: 0|step: 485|ppo_ep: 1|act_loss: 0.0533447265625|cri_loss: 0.0283355712890625|unsuper_loss: 0.0
+average reward score: -1.763671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.74%) |Training time=0.41s (19.14%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 486|ppo_ep: 1|act_loss: -0.0673828125|cri_loss: -0.023956298828125|unsuper_loss: 0.0
+average reward score: -1.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.75%) |Training time=0.41s (19.11%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 487|ppo_ep: 1|act_loss: -0.082275390625|cri_loss: -0.02606201171875|unsuper_loss: 0.0
+average reward score: -0.16162109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.53%) |Training time=0.42s (19.35%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81
+epoch: 0|step: 488|ppo_ep: 1|act_loss: 0.11376953125|cri_loss: 0.06549072265625|unsuper_loss: 0.0
+average reward score: -2.30078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.71%) |Training time=0.41s (19.16%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+[2023-04-21 23:56:21,083] [INFO] [logging.py:96:log_dist] [Rank 0] step=490, skipped=9, lr=[5.637591896641978e-06, 5.637591896641978e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:56:21,102] [INFO] [timer.py:199:stop] epoch=0/micro_step=490/global_step=490, RunningAvgSamplesPerSec=128.80691152053797, CurrSamplesPerSec=129.65503757277642, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:56:21,194] [INFO] [logging.py:96:log_dist] [Rank 0] step=490, skipped=9, lr=[2.9210320707989525e-06, 2.9210320707989525e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 489|ppo_ep: 1|act_loss: 0.3994140625|cri_loss: 0.248779296875|unsuper_loss: 0.0
+average reward score: -1.482421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.73%) |Training time=0.41s (19.14%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 490|ppo_ep: 1|act_loss: 0.0994873046875|cri_loss: 0.056243896484375|unsuper_loss: 0.0
+average reward score: -2.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.44%) |Training time=0.42s (19.43%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 491|ppo_ep: 1|act_loss: 0.0140380859375|cri_loss: 0.0095672607421875|unsuper_loss: 0.0
+average reward score: -0.3212890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.68%) |Training time=0.41s (19.18%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 492|ppo_ep: 1|act_loss: 0.0175933837890625|cri_loss: 0.011749267578125|unsuper_loss: 0.0
+average reward score: -1.095703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.51%) |Training time=0.42s (19.37%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 493|ppo_ep: 1|act_loss: 0.026275634765625|cri_loss: 0.01421356201171875|unsuper_loss: 0.0
+average reward score: -1.01171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.62s (72.77%) |Training time=0.41s (18.48%) |Others=0.19 (8.75%)|CurSamplesPerSec=14.38 |AvgSamplesPerSec=14.81
+epoch: 0|step: 494|ppo_ep: 1|act_loss: -0.0537109375|cri_loss: -0.0220489501953125|unsuper_loss: 0.0
+average reward score: -1.533203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.71s (76.70%) |Training time=0.41s (18.34%) |Others=0.11 (4.96%)|CurSamplesPerSec=14.39 |AvgSamplesPerSec=14.81
+epoch: 0|step: 495|ppo_ep: 1|act_loss: -0.1220703125|cri_loss: -0.0538330078125|unsuper_loss: 0.0
+average reward score: -1.3046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.41%) |Training time=0.42s (19.47%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.81
+epoch: 0|step: 496|ppo_ep: 1|act_loss: 0.113037109375|cri_loss: 0.06378173828125|unsuper_loss: 0.0
+average reward score: -0.7041015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.31%) |Training time=0.42s (19.55%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 497|ppo_ep: 1|act_loss: -0.0479736328125|cri_loss: -0.0192718505859375|unsuper_loss: 0.0
+average reward score: -1.0888671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.63%) |Training time=0.41s (19.23%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 498|ppo_ep: 1|act_loss: -0.0472412109375|cri_loss: -0.0181121826171875|unsuper_loss: 0.0
+average reward score: -1.09375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.51%) |Training time=0.41s (19.36%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+[2023-04-21 23:56:42,686] [INFO] [logging.py:96:log_dist] [Rank 0] step=500, skipped=9, lr=[5.462120543134245e-06, 5.462120543134245e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:56:42,705] [INFO] [timer.py:199:stop] epoch=0/micro_step=500/global_step=500, RunningAvgSamplesPerSec=128.77564245686753, CurrSamplesPerSec=126.7796903470301, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:56:42,797] [INFO] [logging.py:96:log_dist] [Rank 0] step=500, skipped=9, lr=[2.830114271054013e-06, 2.830114271054013e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 499|ppo_ep: 1|act_loss: 0.13818359375|cri_loss: 0.07745361328125|unsuper_loss: 0.0
+average reward score: -0.95703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.51%) |Training time=0.42s (19.37%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 500|ppo_ep: 1|act_loss: 0.0167694091796875|cri_loss: 0.0131988525390625|unsuper_loss: 0.0
+average reward score: -0.96826171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.45%) |Training time=0.42s (19.42%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 501|ppo_ep: 1|act_loss: 0.0234832763671875|cri_loss: 0.0179595947265625|unsuper_loss: 0.0
+average reward score: -1.193359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.38%) |Training time=0.41s (19.23%) |Others=0.12 (5.39%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.81
+epoch: 0|step: 502|ppo_ep: 1|act_loss: -0.00209808349609375|cri_loss: 0.0024261474609375|unsuper_loss: 0.0
+average reward score: -0.5439453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.43%) |Training time=0.42s (19.45%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 503|ppo_ep: 1|act_loss: 0.1025390625|cri_loss: 0.0540771484375|unsuper_loss: 0.0
+average reward score: 0.048095703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.75s (77.23%) |Training time=0.41s (17.91%) |Others=0.11 (4.86%)|CurSamplesPerSec=14.13 |AvgSamplesPerSec=14.81
+epoch: 0|step: 504|ppo_ep: 1|act_loss: 0.099609375|cri_loss: 0.0516357421875|unsuper_loss: 0.0
+average reward score: -0.701171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.16%) |Training time=0.40s (18.72%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 505|ppo_ep: 1|act_loss: 0.077392578125|cri_loss: 0.03948974609375|unsuper_loss: 0.0
+average reward score: -1.853515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.07%) |Training time=0.40s (18.81%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 506|ppo_ep: 1|act_loss: 0.029998779296875|cri_loss: 0.021331787109375|unsuper_loss: 0.0
+average reward score: -0.82861328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.04%) |Training time=0.40s (18.84%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 507|ppo_ep: 1|act_loss: 0.04705810546875|cri_loss: 0.0357666015625|unsuper_loss: 0.0
+average reward score: -0.2374267578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.98%) |Training time=0.41s (18.90%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 508|ppo_ep: 1|act_loss: -0.019744873046875|cri_loss: -0.00775146484375|unsuper_loss: 0.0
+average reward score: -1.1572265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.95%) |Training time=0.46s (20.18%) |Others=0.11 (4.87%)|CurSamplesPerSec=14.18 |AvgSamplesPerSec=14.81
+[2023-04-21 23:57:04,357] [INFO] [logging.py:96:log_dist] [Rank 0] step=510, skipped=9, lr=[5.2857870916303926e-06, 5.2857870916303926e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:57:04,376] [INFO] [timer.py:199:stop] epoch=0/micro_step=510/global_step=510, RunningAvgSamplesPerSec=128.8105377562441, CurrSamplesPerSec=134.4819851007229, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:57:04,468] [INFO] [logging.py:96:log_dist] [Rank 0] step=510, skipped=9, lr=[2.7387497884095297e-06, 2.7387497884095297e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 509|ppo_ep: 1|act_loss: 0.045013427734375|cri_loss: 0.0234375|unsuper_loss: 0.0
+average reward score: -0.90380859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.14%) |Training time=0.40s (18.74%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.81
+epoch: 0|step: 510|ppo_ep: 1|act_loss: -0.018096923828125|cri_loss: -0.0037841796875|unsuper_loss: 0.0
+average reward score: -0.953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.01%) |Training time=0.40s (18.85%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81
+epoch: 0|step: 511|ppo_ep: 1|act_loss: -0.02386474609375|cri_loss: -0.0094146728515625|unsuper_loss: 0.0
+average reward score: -1.75
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.09%) |Training time=0.41s (18.83%) |Others=0.11 (5.08%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.81
+epoch: 0|step: 512|ppo_ep: 1|act_loss: -0.01555633544921875|cri_loss: -0.003448486328125|unsuper_loss: 0.0
+average reward score: -1.1650390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.98%) |Training time=0.40s (18.89%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 513|ppo_ep: 1|act_loss: -0.05865478515625|cri_loss: -0.0181121826171875|unsuper_loss: 0.0
+average reward score: 0.0504150390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.12%) |Training time=0.40s (18.75%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81
+epoch: 0|step: 514|ppo_ep: 1|act_loss: 0.25244140625|cri_loss: 0.17333984375|unsuper_loss: 0.0
+average reward score: -1.330078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.05%) |Training time=0.40s (18.83%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 515|ppo_ep: 1|act_loss: -0.027191162109375|cri_loss: -0.0131683349609375|unsuper_loss: 0.0
+average reward score: -1.1142578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.99%) |Training time=0.40s (18.88%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 516|ppo_ep: 1|act_loss: -0.0731201171875|cri_loss: -0.03143310546875|unsuper_loss: 0.0
+average reward score: 0.60595703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.79%) |Training time=0.41s (19.10%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 517|ppo_ep: 1|act_loss: 0.055328369140625|cri_loss: 0.032196044921875|unsuper_loss: 0.0
+average reward score: -1.23828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.10%) |Training time=0.40s (18.77%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81
+epoch: 0|step: 518|ppo_ep: 1|act_loss: 0.06329345703125|cri_loss: 0.0347900390625|unsuper_loss: 0.0
+average reward score: -1.052734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.93%) |Training time=0.41s (18.95%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+[2023-04-21 23:57:25,811] [INFO] [logging.py:96:log_dist] [Rank 0] step=520, skipped=9, lr=[5.108830141743677e-06, 5.108830141743677e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:57:25,829] [INFO] [timer.py:199:stop] epoch=0/micro_step=520/global_step=520, RunningAvgSamplesPerSec=128.87020056334387, CurrSamplesPerSec=131.38493736546326, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:57:25,922] [INFO] [logging.py:96:log_dist] [Rank 0] step=520, skipped=9, lr=[2.647062249608123e-06, 2.647062249608123e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 519|ppo_ep: 1|act_loss: 0.0419921875|cri_loss: 0.025726318359375|unsuper_loss: 0.0
+average reward score: -0.285888671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.95%) |Training time=0.41s (18.95%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 520|ppo_ep: 1|act_loss: 0.00290679931640625|cri_loss: 0.004146575927734375|unsuper_loss: 0.0
+average reward score: -1.4697265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.07%) |Training time=0.41s (18.85%) |Others=0.11 (5.08%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.81
+epoch: 0|step: 521|ppo_ep: 1|act_loss: 0.1004638671875|cri_loss: 0.05633544921875|unsuper_loss: 0.0
+average reward score: -1.466796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.06%) |Training time=0.40s (18.80%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 522|ppo_ep: 1|act_loss: 0.041107177734375|cri_loss: 0.028961181640625|unsuper_loss: 0.0
+average reward score: -0.830078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.01%) |Training time=0.40s (18.85%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 523|ppo_ep: 1|act_loss: -0.016326904296875|cri_loss: -0.002349853515625|unsuper_loss: 0.0
+average reward score: -0.36962890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.93%) |Training time=0.41s (18.93%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 524|ppo_ep: 1|act_loss: 0.029144287109375|cri_loss: 0.0178985595703125|unsuper_loss: 0.0
+average reward score: -0.12548828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.81s (78.13%) |Training time=0.40s (17.11%) |Others=0.11 (4.76%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.81
+epoch: 0|step: 525|ppo_ep: 1|act_loss: 0.04315185546875|cri_loss: 0.022186279296875|unsuper_loss: 0.0
+average reward score: -1.3505859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.89%) |Training time=0.41s (18.97%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.81
+epoch: 0|step: 526|ppo_ep: 1|act_loss: -0.0006580352783203125|cri_loss: 0.001468658447265625|unsuper_loss: 0.0
+average reward score: -1.166015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.90%) |Training time=0.41s (18.98%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 527|ppo_ep: 1|act_loss: 0.50244140625|cri_loss: 0.341796875|unsuper_loss: 0.0
+average reward score: 0.2208251953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.06%) |Training time=0.40s (18.80%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 528|ppo_ep: 1|act_loss: -0.14111328125|cri_loss: -0.06561279296875|unsuper_loss: 0.0
+average reward score: -0.4248046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.93%) |Training time=0.41s (18.94%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+[2023-04-21 23:57:47,434] [INFO] [logging.py:96:log_dist] [Rank 0] step=530, skipped=9, lr=[4.93148913675299e-06, 4.93148913675299e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:57:47,453] [INFO] [timer.py:199:stop] epoch=0/micro_step=530/global_step=530, RunningAvgSamplesPerSec=128.93183578328944, CurrSamplesPerSec=131.0393681261014, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:57:47,545] [INFO] [logging.py:96:log_dist] [Rank 0] step=530, skipped=9, lr=[2.5551757185248656e-06, 2.5551757185248656e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 529|ppo_ep: 1|act_loss: -0.06744384765625|cri_loss: -0.027130126953125|unsuper_loss: 0.0
+average reward score: -1.453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.89%) |Training time=0.41s (18.99%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 530|ppo_ep: 1|act_loss: -0.12005615234375|cri_loss: -0.0482177734375|unsuper_loss: 0.0
+average reward score: -0.04510498046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.91%) |Training time=0.41s (18.96%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 531|ppo_ep: 1|act_loss: -0.0714111328125|cri_loss: -0.02630615234375|unsuper_loss: 0.0
+average reward score: -1.21875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.92%) |Training time=0.41s (18.96%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 532|ppo_ep: 1|act_loss: -0.00323486328125|cri_loss: 0.02301025390625|unsuper_loss: 0.0
+average reward score: -0.34423828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.05%) |Training time=0.42s (19.22%) |Others=0.12 (5.73%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.81
+epoch: 0|step: 533|ppo_ep: 1|act_loss: 0.04156494140625|cri_loss: 0.02337646484375|unsuper_loss: 0.0
+average reward score: -0.6787109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.73s (77.08%) |Training time=0.40s (18.02%) |Others=0.11 (4.91%)|CurSamplesPerSec=14.24 |AvgSamplesPerSec=14.81
+epoch: 0|step: 534|ppo_ep: 1|act_loss: -0.1334228515625|cri_loss: -0.061859130859375|unsuper_loss: 0.0
+average reward score: -0.400146484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.91%) |Training time=0.41s (18.95%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 535|ppo_ep: 1|act_loss: 0.0897216796875|cri_loss: 0.05316162109375|unsuper_loss: 0.0
+average reward score: -0.697265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.66%) |Training time=0.41s (19.21%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 536|ppo_ep: 1|act_loss: -0.072998046875|cri_loss: -0.0137939453125|unsuper_loss: 0.0
+average reward score: -0.5869140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.84%) |Training time=0.41s (19.03%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81
+epoch: 0|step: 537|ppo_ep: 1|act_loss: 0.0777587890625|cri_loss: 0.043487548828125|unsuper_loss: 0.0
+average reward score: -0.93505859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.73%) |Training time=0.41s (19.14%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 538|ppo_ep: 1|act_loss: 0.0673828125|cri_loss: 0.035186767578125|unsuper_loss: 0.0
+average reward score: -0.671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.73%) |Training time=0.41s (19.14%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+[2023-04-21 23:58:09,179] [INFO] [logging.py:96:log_dist] [Rank 0] step=540, skipped=9, lr=[4.754004039608327e-06, 4.754004039608327e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:58:09,198] [INFO] [timer.py:199:stop] epoch=0/micro_step=540/global_step=540, RunningAvgSamplesPerSec=128.9580352665631, CurrSamplesPerSec=133.35439166833916, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:58:09,290] [INFO] [logging.py:96:log_dist] [Rank 0] step=540, skipped=9, lr=[2.46321452829447e-06, 2.46321452829447e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 539|ppo_ep: 1|act_loss: 0.716796875|cri_loss: 0.46044921875|unsuper_loss: 0.0
+average reward score: -0.89599609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.82s (78.00%) |Training time=0.40s (17.31%) |Others=0.11 (4.70%)|CurSamplesPerSec=13.74 |AvgSamplesPerSec=14.81
+epoch: 0|step: 540|ppo_ep: 1|act_loss: -0.08453369140625|cri_loss: -0.0290374755859375|unsuper_loss: 0.0
+average reward score: -0.1602783203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.74%) |Training time=0.41s (19.14%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 541|ppo_ep: 1|act_loss: 0.0238800048828125|cri_loss: 0.01302337646484375|unsuper_loss: 0.0
+average reward score: -0.55712890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.71%) |Training time=0.41s (19.16%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 542|ppo_ep: 1|act_loss: 0.022369384765625|cri_loss: 0.011962890625|unsuper_loss: 0.0
+average reward score: -0.40478515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.81%) |Training time=0.41s (19.06%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 543|ppo_ep: 1|act_loss: 0.000453948974609375|cri_loss: 0.0005359649658203125|unsuper_loss: 0.0
+average reward score: -0.52783203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.73%) |Training time=0.41s (19.13%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 544|ppo_ep: 1|act_loss: -0.10272216796875|cri_loss: -0.04510498046875|unsuper_loss: 0.0
+average reward score: 0.463134765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.74%) |Training time=0.41s (19.12%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 545|ppo_ep: 1|act_loss: -0.06622314453125|cri_loss: -0.0285797119140625|unsuper_loss: 0.0
+average reward score: -0.488037109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.87%) |Training time=0.41s (18.99%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81
+epoch: 0|step: 546|ppo_ep: 1|act_loss: 0.054931640625|cri_loss: 0.03485107421875|unsuper_loss: 0.0
+average reward score: -0.81201171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.71%) |Training time=0.41s (19.17%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 547|ppo_ep: 1|act_loss: 0.05303955078125|cri_loss: 0.0277099609375|unsuper_loss: 0.0
+average reward score: -1.078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.70%) |Training time=0.41s (19.17%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 548|ppo_ep: 1|act_loss: 0.01468658447265625|cri_loss: 0.00971221923828125|unsuper_loss: 0.0
+average reward score: -1.755859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.71%) |Training time=0.41s (19.16%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+[2023-04-21 23:58:30,609] [INFO] [logging.py:96:log_dist] [Rank 0] step=550, skipped=9, lr=[4.576615008233078e-06, 4.576615008233078e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:58:30,627] [INFO] [timer.py:199:stop] epoch=0/micro_step=550/global_step=550, RunningAvgSamplesPerSec=128.96546878661252, CurrSamplesPerSec=129.5923189519664, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:58:30,720] [INFO] [logging.py:96:log_dist] [Rank 0] step=550, skipped=9, lr=[2.371303113074134e-06, 2.371303113074134e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 549|ppo_ep: 1|act_loss: -0.003719329833984375|cri_loss: 0.00133514404296875|unsuper_loss: 0.0
+average reward score: -0.8671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.74%) |Training time=0.41s (19.15%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81
+epoch: 0|step: 550|ppo_ep: 1|act_loss: -0.0433349609375|cri_loss: -0.020294189453125|unsuper_loss: 0.0
+average reward score: -1.921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.50%) |Training time=0.42s (19.38%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 551|ppo_ep: 1|act_loss: 0.033416748046875|cri_loss: 0.02197265625|unsuper_loss: 0.0
+average reward score: -0.27392578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.72%) |Training time=0.41s (19.15%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 552|ppo_ep: 1|act_loss: 0.0675048828125|cri_loss: 0.03875732421875|unsuper_loss: 0.0
+average reward score: -0.7763671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.73%) |Training time=0.41s (19.15%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 553|ppo_ep: 1|act_loss: 0.0059967041015625|cri_loss: 0.0139923095703125|unsuper_loss: 0.0
+average reward score: -0.435546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.90%) |Training time=0.41s (18.97%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81
+epoch: 0|step: 554|ppo_ep: 1|act_loss: 0.0028285980224609375|cri_loss: 0.004730224609375|unsuper_loss: 0.0
+average reward score: -0.368408203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.66s (73.26%) |Training time=0.50s (21.88%) |Others=0.11 (4.86%)|CurSamplesPerSec=14.13 |AvgSamplesPerSec=14.81
+epoch: 0|step: 555|ppo_ep: 1|act_loss: 0.0911865234375|cri_loss: 0.047454833984375|unsuper_loss: 0.0
+average reward score: -0.98876953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.00%) |Training time=0.40s (18.88%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 556|ppo_ep: 1|act_loss: 0.09619140625|cri_loss: 0.05340576171875|unsuper_loss: 0.0
+average reward score: -0.49072265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.98%) |Training time=0.40s (18.89%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 557|ppo_ep: 1|act_loss: 0.01202392578125|cri_loss: 0.00739288330078125|unsuper_loss: 0.0
+average reward score: -1.9482421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.99%) |Training time=0.40s (18.87%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 558|ppo_ep: 1|act_loss: 0.089111328125|cri_loss: 0.04901123046875|unsuper_loss: 0.0
+average reward score: -1.083984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.83%) |Training time=0.41s (19.05%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+[2023-04-21 23:58:52,166] [INFO] [logging.py:96:log_dist] [Rank 0] step=560, skipped=9, lr=[4.399562070562508e-06, 4.399562070562508e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:58:52,185] [INFO] [timer.py:199:stop] epoch=0/micro_step=560/global_step=560, RunningAvgSamplesPerSec=128.95737058219476, CurrSamplesPerSec=130.43333521215456, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:58:52,277] [INFO] [logging.py:96:log_dist] [Rank 0] step=560, skipped=9, lr=[2.279565839669693e-06, 2.279565839669693e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 559|ppo_ep: 1|act_loss: 0.047607421875|cri_loss: 0.025360107421875|unsuper_loss: 0.0
+average reward score: -1.916015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.88%) |Training time=0.41s (19.02%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 560|ppo_ep: 1|act_loss: 0.0312042236328125|cri_loss: 0.02032470703125|unsuper_loss: 0.0
+average reward score: -1.3662109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.88%) |Training time=0.41s (19.00%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 561|ppo_ep: 1|act_loss: 0.07525634765625|cri_loss: 0.038787841796875|unsuper_loss: 0.0
+average reward score: -1.5234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.01%) |Training time=0.40s (18.85%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 562|ppo_ep: 1|act_loss: 0.0253753662109375|cri_loss: 0.015106201171875|unsuper_loss: 0.0
+average reward score: -1.615234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.07%) |Training time=0.43s (19.85%) |Others=0.11 (5.08%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.81
+epoch: 0|step: 563|ppo_ep: 1|act_loss: -0.0050201416015625|cri_loss: 0.00833892822265625|unsuper_loss: 0.0
+average reward score: 0.74658203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.74s (77.30%) |Training time=0.40s (17.80%) |Others=0.11 (4.90%)|CurSamplesPerSec=14.21 |AvgSamplesPerSec=14.81
+epoch: 0|step: 564|ppo_ep: 1|act_loss: 0.0182342529296875|cri_loss: 0.0108184814453125|unsuper_loss: 0.0
+average reward score: -0.347900390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.71%) |Training time=0.41s (19.16%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 565|ppo_ep: 1|act_loss: 0.01153564453125|cri_loss: 0.007289886474609375|unsuper_loss: 0.0
+average reward score: 0.007080078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.84%) |Training time=0.41s (19.02%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81
+epoch: 0|step: 566|ppo_ep: 1|act_loss: -0.0457763671875|cri_loss: -0.02069091796875|unsuper_loss: 0.0
+average reward score: -0.7421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.46%) |Training time=0.41s (19.08%) |Others=0.12 (5.46%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.81
+epoch: 0|step: 567|ppo_ep: 1|act_loss: 0.033111572265625|cri_loss: 0.0208587646484375|unsuper_loss: 0.0
+average reward score: -1.6044921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.78%) |Training time=0.41s (19.09%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 568|ppo_ep: 1|act_loss: 0.07696533203125|cri_loss: 0.043701171875|unsuper_loss: 0.0
+average reward score: -0.55078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.80%) |Training time=0.41s (19.07%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+[2023-04-21 23:59:13,907] [INFO] [logging.py:96:log_dist] [Rank 0] step=570, skipped=9, lr=[4.223084799758111e-06, 4.223084799758111e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:59:13,925] [INFO] [timer.py:199:stop] epoch=0/micro_step=570/global_step=570, RunningAvgSamplesPerSec=128.97926272081045, CurrSamplesPerSec=133.12384933005495, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:59:14,018] [INFO] [logging.py:96:log_dist] [Rank 0] step=570, skipped=9, lr=[2.1881268392529074e-06, 2.1881268392529074e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 569|ppo_ep: 1|act_loss: 0.08038330078125|cri_loss: 0.04248046875|unsuper_loss: 0.0
+average reward score: -0.7685546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.80%) |Training time=0.40s (17.45%) |Others=0.11 (4.75%)|CurSamplesPerSec=13.84 |AvgSamplesPerSec=14.81
+epoch: 0|step: 570|ppo_ep: 1|act_loss: 0.088623046875|cri_loss: 0.045562744140625|unsuper_loss: 0.0
+average reward score: -0.7060546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.79%) |Training time=0.41s (19.08%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 571|ppo_ep: 1|act_loss: 0.008026123046875|cri_loss: 0.005352020263671875|unsuper_loss: 0.0
+average reward score: -1.9697265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.86%) |Training time=0.41s (19.01%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 572|ppo_ep: 1|act_loss: -0.0545654296875|cri_loss: -0.025299072265625|unsuper_loss: 0.0
+average reward score: -1.1875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.92%) |Training time=0.41s (18.94%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81
+epoch: 0|step: 573|ppo_ep: 1|act_loss: 0.0059967041015625|cri_loss: 0.009002685546875|unsuper_loss: 0.0
+average reward score: 0.2705078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.13%) |Training time=0.40s (18.72%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.81
+epoch: 0|step: 574|ppo_ep: 1|act_loss: -0.1192626953125|cri_loss: -0.054412841796875|unsuper_loss: 0.0
+average reward score: -0.132568359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.87%) |Training time=0.41s (19.00%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 575|ppo_ep: 1|act_loss: -0.015777587890625|cri_loss: -0.002105712890625|unsuper_loss: 0.0
+average reward score: -0.25244140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.85%) |Training time=0.41s (19.03%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 576|ppo_ep: 1|act_loss: 0.136962890625|cri_loss: 0.0953369140625|unsuper_loss: 0.0
+average reward score: -1.44140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.76%) |Training time=0.41s (19.11%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 577|ppo_ep: 1|act_loss: -0.00135040283203125|cri_loss: 0.005489349365234375|unsuper_loss: 0.0
+average reward score: -1.443359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.01%) |Training time=0.40s (18.86%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 578|ppo_ep: 1|act_loss: 0.19091796875|cri_loss: 0.109130859375|unsuper_loss: 0.0
+average reward score: -1.5791015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.83%) |Training time=0.41s (19.03%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+[2023-04-21 23:59:35,337] [INFO] [logging.py:96:log_dist] [Rank 0] step=580, skipped=9, lr=[4.047421990037345e-06, 4.047421990037345e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:59:35,355] [INFO] [timer.py:199:stop] epoch=0/micro_step=580/global_step=580, RunningAvgSamplesPerSec=129.0139638309541, CurrSamplesPerSec=131.0647044061024, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:59:35,448] [INFO] [logging.py:96:log_dist] [Rank 0] step=580, skipped=9, lr=[2.097109839397588e-06, 2.097109839397588e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 579|ppo_ep: 1|act_loss: -0.0176544189453125|cri_loss: 0.0020294189453125|unsuper_loss: 0.0
+average reward score: -0.6689453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.88%) |Training time=0.41s (19.01%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 580|ppo_ep: 1|act_loss: -0.198486328125|cri_loss: -0.082763671875|unsuper_loss: 0.0
+average reward score: -1.0498046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.94%) |Training time=0.41s (18.93%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 581|ppo_ep: 1|act_loss: 0.08685302734375|cri_loss: 0.061767578125|unsuper_loss: 0.0
+average reward score: -0.341796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.11%) |Training time=0.40s (18.76%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81
+epoch: 0|step: 582|ppo_ep: 1|act_loss: -0.051513671875|cri_loss: -0.02471923828125|unsuper_loss: 0.0
+average reward score: -2.01953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.06%) |Training time=0.40s (18.80%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 583|ppo_ep: 1|act_loss: 0.03472900390625|cri_loss: 0.024383544921875|unsuper_loss: 0.0
+average reward score: -0.962890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.99%) |Training time=0.40s (18.89%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 584|ppo_ep: 1|act_loss: 0.0494384765625|cri_loss: 0.025604248046875|unsuper_loss: 0.0
+average reward score: -0.109130859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.85%) |Training time=0.40s (17.38%) |Others=0.11 (4.77%)|CurSamplesPerSec=13.88 |AvgSamplesPerSec=14.81
+epoch: 0|step: 585|ppo_ep: 1|act_loss: 0.0477294921875|cri_loss: 0.0253143310546875|unsuper_loss: 0.0
+average reward score: -1.673828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.08%) |Training time=0.40s (18.77%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81
+epoch: 0|step: 586|ppo_ep: 1|act_loss: 0.0694580078125|cri_loss: 0.03692626953125|unsuper_loss: 0.0
+average reward score: -1.9365234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.02%) |Training time=0.40s (18.85%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 587|ppo_ep: 1|act_loss: -0.0389404296875|cri_loss: -0.0133819580078125|unsuper_loss: 0.0
+average reward score: -0.0748291015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.05%) |Training time=0.40s (18.81%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81
+epoch: 0|step: 588|ppo_ep: 1|act_loss: 0.03643798828125|cri_loss: 0.019317626953125|unsuper_loss: 0.0
+average reward score: -0.373779296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.07%) |Training time=0.40s (18.79%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81
+[2023-04-21 23:59:56,914] [INFO] [logging.py:96:log_dist] [Rank 0] step=590, skipped=9, lr=[3.872811333557339e-06, 3.872811333557339e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-21 23:59:56,932] [INFO] [timer.py:199:stop] epoch=0/micro_step=590/global_step=590, RunningAvgSamplesPerSec=129.0828609652265, CurrSamplesPerSec=133.9209591545136, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-21 23:59:57,025] [INFO] [logging.py:96:log_dist] [Rank 0] step=590, skipped=9, lr=[2.0066379966618336e-06, 2.0066379966618336e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 589|ppo_ep: 1|act_loss: -0.04449462890625|cri_loss: -0.0187225341796875|unsuper_loss: 0.0
+average reward score: -0.70556640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.07%) |Training time=0.40s (18.80%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.81
+epoch: 0|step: 590|ppo_ep: 1|act_loss: -0.0704345703125|cri_loss: -0.0311431884765625|unsuper_loss: 0.0
+average reward score: -1.0146484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.93%) |Training time=0.41s (18.94%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81
+epoch: 0|step: 591|ppo_ep: 1|act_loss: 0.02191162109375|cri_loss: 0.01136016845703125|unsuper_loss: 0.0
+average reward score: 0.38037109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.13%) |Training time=0.40s (18.74%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 592|ppo_ep: 1|act_loss: -0.055938720703125|cri_loss: -0.02142333984375|unsuper_loss: 0.0
+average reward score: -0.204345703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.92%) |Training time=0.41s (18.97%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.81
+epoch: 0|step: 593|ppo_ep: 1|act_loss: -0.00064849853515625|cri_loss: 0.0021076202392578125|unsuper_loss: 0.0
+average reward score: -1.3173828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.73s (77.14%) |Training time=0.40s (17.96%) |Others=0.11 (4.90%)|CurSamplesPerSec=14.27 |AvgSamplesPerSec=14.81
+epoch: 0|step: 594|ppo_ep: 1|act_loss: 0.16748046875|cri_loss: 0.0906982421875|unsuper_loss: 0.0
+average reward score: -0.52734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.96%) |Training time=0.40s (18.90%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 595|ppo_ep: 1|act_loss: 0.06268310546875|cri_loss: 0.035736083984375|unsuper_loss: 0.0
+average reward score: -0.80419921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.12%) |Training time=0.40s (18.74%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 596|ppo_ep: 1|act_loss: 0.0067596435546875|cri_loss: 0.0061798095703125|unsuper_loss: 0.0
+average reward score: -0.931640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.10%) |Training time=0.40s (18.76%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81
+epoch: 0|step: 597|ppo_ep: 1|act_loss: 0.08984375|cri_loss: 0.05364990234375|unsuper_loss: 0.0
+average reward score: 0.299560546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.98%) |Training time=0.40s (18.88%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 598|ppo_ep: 1|act_loss: 0.0401611328125|cri_loss: 0.022216796875|unsuper_loss: 0.0
+average reward score: -0.8349609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.02%) |Training time=0.40s (18.84%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+[2023-04-22 00:00:18,614] [INFO] [logging.py:96:log_dist] [Rank 0] step=600, skipped=9, lr=[3.699489098789857e-06, 3.699489098789857e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-22 00:00:18,633] [INFO] [timer.py:199:stop] epoch=0/micro_step=600/global_step=600, RunningAvgSamplesPerSec=129.141982278309, CurrSamplesPerSec=133.55582532966616, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-22 00:00:18,725] [INFO] [logging.py:96:log_dist] [Rank 0] step=600, skipped=9, lr=[1.916833729942931e-06, 1.916833729942931e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 599|ppo_ep: 1|act_loss: -0.0003910064697265625|cri_loss: 0.0005998611450195312|unsuper_loss: 0.0
+average reward score: -1.083984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.79s (77.79%) |Training time=0.40s (17.44%) |Others=0.11 (4.76%)|CurSamplesPerSec=13.88 |AvgSamplesPerSec=14.81
+epoch: 0|step: 600|ppo_ep: 1|act_loss: 0.0038242340087890625|cri_loss: 0.00618743896484375|unsuper_loss: 0.0
+average reward score: -0.68017578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.00%) |Training time=0.40s (18.86%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 601|ppo_ep: 1|act_loss: -0.0005950927734375|cri_loss: 0.00839996337890625|unsuper_loss: 0.0
+average reward score: -0.7646484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.91%) |Training time=0.41s (18.94%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.81
+epoch: 0|step: 602|ppo_ep: 1|act_loss: -0.06658935546875|cri_loss: -0.0243072509765625|unsuper_loss: 0.0
+average reward score: -0.162353515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.77%) |Training time=0.41s (19.10%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 603|ppo_ep: 1|act_loss: 0.009246826171875|cri_loss: 0.00897216796875|unsuper_loss: 0.0
+average reward score: 0.247314453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.76%) |Training time=0.41s (19.10%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 604|ppo_ep: 1|act_loss: 0.005096435546875|cri_loss: 0.01361846923828125|unsuper_loss: 0.0
+average reward score: -0.9736328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.80%) |Training time=0.41s (19.07%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 605|ppo_ep: 1|act_loss: 0.03472900390625|cri_loss: 0.021392822265625|unsuper_loss: 0.0
+average reward score: -1.021484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.94%) |Training time=0.40s (18.91%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.81
+epoch: 0|step: 606|ppo_ep: 1|act_loss: 0.022247314453125|cri_loss: 0.0121307373046875|unsuper_loss: 0.0
+average reward score: -0.0771484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.86%) |Training time=0.41s (18.99%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81
+epoch: 0|step: 607|ppo_ep: 1|act_loss: -0.104248046875|cri_loss: -0.04425048828125|unsuper_loss: 0.0
+average reward score: 0.28271484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.91%) |Training time=0.41s (18.91%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 608|ppo_ep: 1|act_loss: -0.04498291015625|cri_loss: -0.015960693359375|unsuper_loss: 0.0
+average reward score: -1.5947265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.73%) |Training time=0.41s (19.13%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+[2023-04-22 00:00:40,031] [INFO] [logging.py:96:log_dist] [Rank 0] step=610, skipped=9, lr=[3.5276898108226314e-06, 3.5276898108226314e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-22 00:00:40,049] [INFO] [timer.py:199:stop] epoch=0/micro_step=610/global_step=610, RunningAvgSamplesPerSec=129.1724633970564, CurrSamplesPerSec=131.91333396889732, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-22 00:00:40,142] [INFO] [logging.py:96:log_dist] [Rank 0] step=610, skipped=9, lr=[1.8278185548303789e-06, 1.8278185548303789e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 609|ppo_ep: 1|act_loss: -0.0159759521484375|cri_loss: -0.0038909912109375|unsuper_loss: 0.0
+average reward score: -0.2020263671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.92%) |Training time=0.41s (18.96%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.81
+epoch: 0|step: 610|ppo_ep: 1|act_loss: -0.062408447265625|cri_loss: -0.0260009765625|unsuper_loss: 0.0
+average reward score: -0.194091796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.81%) |Training time=0.41s (19.05%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 611|ppo_ep: 1|act_loss: -0.04132080078125|cri_loss: -0.0162200927734375|unsuper_loss: 0.0
+average reward score: -0.73681640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.99%) |Training time=0.40s (18.86%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 612|ppo_ep: 1|act_loss: 0.0279541015625|cri_loss: 0.020477294921875|unsuper_loss: 0.0
+average reward score: -0.161376953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.85%) |Training time=0.41s (19.00%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.81
+epoch: 0|step: 613|ppo_ep: 1|act_loss: 0.02978515625|cri_loss: 0.021728515625|unsuper_loss: 0.0
+average reward score: -1.361328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.92%) |Training time=0.41s (18.95%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.81
+epoch: 0|step: 614|ppo_ep: 1|act_loss: 0.0789794921875|cri_loss: 0.04876708984375|unsuper_loss: 0.0
+average reward score: -1.2548828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.79s (77.74%) |Training time=0.40s (17.48%) |Others=0.11 (4.78%)|CurSamplesPerSec=13.89 |AvgSamplesPerSec=14.81
+epoch: 0|step: 615|ppo_ep: 1|act_loss: 0.032196044921875|cri_loss: 0.0184326171875|unsuper_loss: 0.0
+average reward score: -0.97265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.91%) |Training time=0.41s (18.95%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81
+epoch: 0|step: 616|ppo_ep: 1|act_loss: -0.04571533203125|cri_loss: -0.01983642578125|unsuper_loss: 0.0
+average reward score: -0.634765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.65%) |Training time=0.41s (19.20%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 617|ppo_ep: 1|act_loss: -0.069580078125|cri_loss: -0.0295562744140625|unsuper_loss: 0.0
+average reward score: -0.221435546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.82%) |Training time=0.41s (19.03%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81
+epoch: 0|step: 618|ppo_ep: 1|act_loss: 0.048736572265625|cri_loss: 0.02740478515625|unsuper_loss: 0.0
+average reward score: -2.14453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.76%) |Training time=0.41s (19.10%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+[2023-04-22 00:01:01,606] [INFO] [logging.py:96:log_dist] [Rank 0] step=620, skipped=9, lr=[3.3576459340197268e-06, 3.3576459340197268e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-22 00:01:01,625] [INFO] [timer.py:199:stop] epoch=0/micro_step=620/global_step=620, RunningAvgSamplesPerSec=129.19904141078314, CurrSamplesPerSec=128.60686758116876, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-22 00:01:01,717] [INFO] [logging.py:96:log_dist] [Rank 0] step=620, skipped=9, lr=[1.7397129191812058e-06, 1.7397129191812058e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 619|ppo_ep: 1|act_loss: -0.07763671875|cri_loss: -0.032867431640625|unsuper_loss: 0.0
+average reward score: -0.50048828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.65%) |Training time=0.41s (19.22%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 620|ppo_ep: 1|act_loss: 0.1051025390625|cri_loss: 0.05865478515625|unsuper_loss: 0.0
+average reward score: -0.99365234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.77%) |Training time=0.41s (19.09%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 621|ppo_ep: 1|act_loss: -0.048065185546875|cri_loss: -0.0211639404296875|unsuper_loss: 0.0
+average reward score: 0.398681640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.91%) |Training time=0.41s (18.95%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 622|ppo_ep: 1|act_loss: 0.07000732421875|cri_loss: 0.042724609375|unsuper_loss: 0.0
+average reward score: -1.185546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.57%) |Training time=0.42s (19.35%) |Others=0.11 (5.08%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.81
+epoch: 0|step: 623|ppo_ep: 1|act_loss: 0.011199951171875|cri_loss: 0.01171875|unsuper_loss: 0.0
+average reward score: -1.3115234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.73s (77.09%) |Training time=0.40s (18.00%) |Others=0.11 (4.91%)|CurSamplesPerSec=14.28 |AvgSamplesPerSec=14.81
+epoch: 0|step: 624|ppo_ep: 1|act_loss: -0.017120361328125|cri_loss: -0.00676727294921875|unsuper_loss: 0.0
+average reward score: -1.40625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.59%) |Training time=0.41s (19.27%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.81
+epoch: 0|step: 625|ppo_ep: 1|act_loss: -0.02935791015625|cri_loss: -0.0076904296875|unsuper_loss: 0.0
+average reward score: -0.83349609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.69%) |Training time=0.41s (19.16%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.81
+epoch: 0|step: 626|ppo_ep: 1|act_loss: 0.03070068359375|cri_loss: 0.0194854736328125|unsuper_loss: 0.0
+average reward score: 0.1630859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.57%) |Training time=0.41s (19.29%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81
+epoch: 0|step: 627|ppo_ep: 1|act_loss: 0.02642822265625|cri_loss: 0.02081298828125|unsuper_loss: 0.0
+average reward score: -0.183349609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.53%) |Training time=0.41s (19.33%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81
+epoch: 0|step: 628|ppo_ep: 1|act_loss: 0.00371551513671875|cri_loss: 0.006000518798828125|unsuper_loss: 0.0
+average reward score: -0.80078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.71%) |Training time=0.41s (19.10%) |Others=0.11 (5.19%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.81
+[2023-04-22 00:01:23,305] [INFO] [logging.py:96:log_dist] [Rank 0] step=630, skipped=9, lr=[3.1895875574702854e-06, 3.1895875574702854e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-22 00:01:23,324] [INFO] [timer.py:199:stop] epoch=0/micro_step=630/global_step=630, RunningAvgSamplesPerSec=129.2124712085752, CurrSamplesPerSec=132.39765740367173, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-22 00:01:23,416] [INFO] [logging.py:96:log_dist] [Rank 0] step=630, skipped=9, lr=[1.6526360401400443e-06, 1.6526360401400443e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 629|ppo_ep: 1|act_loss: -0.08563232421875|cri_loss: -0.035675048828125|unsuper_loss: 0.0
+average reward score: -1.3447265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.79s (77.71%) |Training time=0.40s (17.54%) |Others=0.11 (4.76%)|CurSamplesPerSec=13.87 |AvgSamplesPerSec=14.81
+epoch: 0|step: 630|ppo_ep: 1|act_loss: -0.03173828125|cri_loss: -0.0081024169921875|unsuper_loss: 0.0
+average reward score: -0.296630859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.60%) |Training time=0.41s (19.25%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.81
+epoch: 0|step: 631|ppo_ep: 1|act_loss: -0.0943603515625|cri_loss: -0.040283203125|unsuper_loss: 0.0
+average reward score: -0.37939453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.43%) |Training time=0.42s (19.43%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 632|ppo_ep: 1|act_loss: -0.05322265625|cri_loss: -0.0235595703125|unsuper_loss: 0.0
+average reward score: -0.5087890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.65%) |Training time=0.41s (19.21%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81
+epoch: 0|step: 633|ppo_ep: 1|act_loss: -0.08282470703125|cri_loss: -0.031707763671875|unsuper_loss: 0.0
+average reward score: -0.88720703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.61%) |Training time=0.41s (19.24%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.81
+epoch: 0|step: 634|ppo_ep: 1|act_loss: -0.0310821533203125|cri_loss: -0.0131072998046875|unsuper_loss: 0.0
+average reward score: -0.9306640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.62%) |Training time=0.41s (19.23%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81
+epoch: 0|step: 635|ppo_ep: 1|act_loss: -0.076904296875|cri_loss: -0.0321044921875|unsuper_loss: 0.0
+average reward score: -0.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.64%) |Training time=0.41s (19.23%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 636|ppo_ep: 1|act_loss: -0.0421142578125|cri_loss: -0.01532745361328125|unsuper_loss: 0.0
+average reward score: -1.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.88%) |Training time=0.41s (18.97%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 637|ppo_ep: 1|act_loss: 0.028472900390625|cri_loss: 0.020843505859375|unsuper_loss: 0.0
+average reward score: -0.171142578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.66%) |Training time=0.41s (19.20%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 638|ppo_ep: 1|act_loss: -0.00150299072265625|cri_loss: 0.001399993896484375|unsuper_loss: 0.0
+average reward score: -0.461669921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.63%) |Training time=0.41s (19.23%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+[2023-04-22 00:01:44,720] [INFO] [logging.py:96:log_dist] [Rank 0] step=640, skipped=9, lr=[3.0237420836513046e-06, 3.0237420836513046e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-22 00:01:44,739] [INFO] [timer.py:199:stop] epoch=0/micro_step=640/global_step=640, RunningAvgSamplesPerSec=129.20022608214921, CurrSamplesPerSec=127.59636959272474, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-22 00:01:44,832] [INFO] [logging.py:96:log_dist] [Rank 0] step=640, skipped=9, lr=[1.5667057428245102e-06, 1.5667057428245102e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 639|ppo_ep: 1|act_loss: 0.00522613525390625|cri_loss: 0.006351470947265625|unsuper_loss: 0.0
+average reward score: 0.156005859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.58%) |Training time=0.41s (19.30%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 640|ppo_ep: 1|act_loss: 0.1121826171875|cri_loss: 0.060943603515625|unsuper_loss: 0.0
+average reward score: -0.46044921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.79%) |Training time=0.41s (19.06%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 641|ppo_ep: 1|act_loss: 0.050201416015625|cri_loss: 0.03204345703125|unsuper_loss: 0.0
+average reward score: -0.8583984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.77%) |Training time=0.41s (19.08%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.81
+epoch: 0|step: 642|ppo_ep: 1|act_loss: -0.024688720703125|cri_loss: -0.00608062744140625|unsuper_loss: 0.0
+average reward score: -1.376953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.52%) |Training time=0.41s (19.35%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 643|ppo_ep: 1|act_loss: 0.06982421875|cri_loss: 0.036041259765625|unsuper_loss: 0.0
+average reward score: -0.19677734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.80%) |Training time=0.41s (19.07%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 644|ppo_ep: 1|act_loss: -0.039306640625|cri_loss: -0.0154571533203125|unsuper_loss: 0.0
+average reward score: -0.955078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.87%) |Training time=0.41s (19.00%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 645|ppo_ep: 1|act_loss: -0.0042266845703125|cri_loss: 0.003551483154296875|unsuper_loss: 0.0
+average reward score: 0.63671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.19%) |Training time=0.46s (20.80%) |Others=0.11 (5.01%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.81
+epoch: 0|step: 646|ppo_ep: 1|act_loss: -0.017852783203125|cri_loss: -0.0043487548828125|unsuper_loss: 0.0
+average reward score: -0.84423828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.86%) |Training time=0.41s (19.01%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 647|ppo_ep: 1|act_loss: 0.103759765625|cri_loss: 0.053558349609375|unsuper_loss: 0.0
+average reward score: -2.94140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.97%) |Training time=0.40s (18.89%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81
+epoch: 0|step: 648|ppo_ep: 1|act_loss: 0.043792724609375|cri_loss: 0.0270233154296875|unsuper_loss: 0.0
+average reward score: -1.533203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.92%) |Training time=0.41s (18.95%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+[2023-04-22 00:02:06,193] [INFO] [logging.py:96:log_dist] [Rank 0] step=650, skipped=9, lr=[2.860333920725707e-06, 2.860333920725707e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-22 00:02:06,211] [INFO] [timer.py:199:stop] epoch=0/micro_step=650/global_step=650, RunningAvgSamplesPerSec=129.208791284019, CurrSamplesPerSec=131.42469047796234, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-22 00:02:06,304] [INFO] [logging.py:96:log_dist] [Rank 0] step=650, skipped=9, lr=[1.482038300894149e-06, 1.482038300894149e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 649|ppo_ep: 1|act_loss: -0.0606689453125|cri_loss: -0.0254974365234375|unsuper_loss: 0.0
+average reward score: 0.1875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.90%) |Training time=0.41s (18.99%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81
+epoch: 0|step: 650|ppo_ep: 1|act_loss: 0.0689697265625|cri_loss: 0.039520263671875|unsuper_loss: 0.0
+average reward score: 0.5341796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.83%) |Training time=0.41s (19.03%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 651|ppo_ep: 1|act_loss: -0.0679931640625|cri_loss: -0.027801513671875|unsuper_loss: 0.0
+average reward score: -0.249755859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.93%) |Training time=0.41s (18.94%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 652|ppo_ep: 1|act_loss: 0.03314208984375|cri_loss: 0.0246429443359375|unsuper_loss: 0.0
+average reward score: -0.398193359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.38%) |Training time=0.41s (19.16%) |Others=0.12 (5.47%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.81
+epoch: 0|step: 653|ppo_ep: 1|act_loss: -0.047332763671875|cri_loss: -0.0184478759765625|unsuper_loss: 0.0
+average reward score: -0.6884765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.73s (76.99%) |Training time=0.41s (18.10%) |Others=0.11 (4.91%)|CurSamplesPerSec=14.26 |AvgSamplesPerSec=14.81
+epoch: 0|step: 654|ppo_ep: 1|act_loss: -0.01456451416015625|cri_loss: -0.00463104248046875|unsuper_loss: 0.0
+average reward score: -1.439453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.82%) |Training time=0.41s (19.06%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 655|ppo_ep: 1|act_loss: 0.027130126953125|cri_loss: 0.01381683349609375|unsuper_loss: 0.0
+average reward score: -0.607421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.80%) |Training time=0.41s (19.07%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 656|ppo_ep: 1|act_loss: 0.0105743408203125|cri_loss: 0.01410675048828125|unsuper_loss: 0.0
+average reward score: -1.7607421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.80%) |Training time=0.41s (19.06%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 657|ppo_ep: 1|act_loss: -0.0599365234375|cri_loss: -0.0264739990234375|unsuper_loss: 0.0
+average reward score: 0.05181884765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.03%) |Training time=0.40s (18.83%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81
+epoch: 0|step: 658|ppo_ep: 1|act_loss: 0.0421142578125|cri_loss: 0.025360107421875|unsuper_loss: 0.0
+average reward score: -0.494384765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.89%) |Training time=0.41s (18.97%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+[2023-04-22 00:02:27,753] [INFO] [logging.py:96:log_dist] [Rank 0] step=660, skipped=9, lr=[2.6995841788920667e-06, 2.6995841788920667e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-22 00:02:27,771] [INFO] [timer.py:199:stop] epoch=0/micro_step=660/global_step=660, RunningAvgSamplesPerSec=129.23248095416145, CurrSamplesPerSec=129.5397872044721, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-22 00:02:27,864] [INFO] [logging.py:96:log_dist] [Rank 0] step=660, skipped=9, lr=[1.3987482792186873e-06, 1.3987482792186873e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 659|ppo_ep: 1|act_loss: -0.1041259765625|cri_loss: -0.048553466796875|unsuper_loss: 0.0
+average reward score: -0.80615234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.97%) |Training time=0.41s (18.96%) |Others=0.11 (5.06%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.81
+epoch: 0|step: 660|ppo_ep: 1|act_loss: -0.0287628173828125|cri_loss: -0.0077362060546875|unsuper_loss: 0.0
+average reward score: -0.56640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.65s (72.88%) |Training time=0.50s (22.27%) |Others=0.11 (4.85%)|CurSamplesPerSec=14.13 |AvgSamplesPerSec=14.81
+epoch: 0|step: 661|ppo_ep: 1|act_loss: 0.04998779296875|cri_loss: 0.031097412109375|unsuper_loss: 0.0
+average reward score: 0.41943359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.00%) |Training time=0.40s (18.86%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.81
+epoch: 0|step: 662|ppo_ep: 1|act_loss: -0.1300048828125|cri_loss: -0.054443359375|unsuper_loss: 0.0
+average reward score: -0.875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.90%) |Training time=0.41s (18.97%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 663|ppo_ep: 1|act_loss: -0.0911865234375|cri_loss: -0.0396728515625|unsuper_loss: 0.0
+average reward score: -0.47216796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.91%) |Training time=0.41s (18.95%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81
+epoch: 0|step: 664|ppo_ep: 1|act_loss: -0.007312774658203125|cri_loss: -0.00016021728515625|unsuper_loss: 0.0
+average reward score: -0.41162109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.09%) |Training time=0.40s (18.75%) |Others=0.11 (5.16%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.81
+epoch: 0|step: 665|ppo_ep: 1|act_loss: 0.006931304931640625|cri_loss: 0.00670623779296875|unsuper_loss: 0.0
+average reward score: 0.76318359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.82%) |Training time=0.41s (19.05%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 666|ppo_ep: 1|act_loss: 0.05279541015625|cri_loss: 0.030670166015625|unsuper_loss: 0.0
+average reward score: -1.1728515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.91%) |Training time=0.41s (18.96%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 667|ppo_ep: 1|act_loss: -0.04705810546875|cri_loss: -0.018035888671875|unsuper_loss: 0.0
+average reward score: 0.5361328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.90%) |Training time=0.41s (18.97%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 668|ppo_ep: 1|act_loss: 0.03460693359375|cri_loss: 0.0204620361328125|unsuper_loss: 0.0
+average reward score: -0.34765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.84%) |Training time=0.41s (19.03%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+[2023-04-22 00:02:49,289] [INFO] [logging.py:96:log_dist] [Rank 0] step=670, skipped=9, lr=[2.5417103711968625e-06, 2.5417103711968625e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-22 00:02:49,307] [INFO] [timer.py:199:stop] epoch=0/micro_step=670/global_step=670, RunningAvgSamplesPerSec=129.22857543246127, CurrSamplesPerSec=129.82911444443477, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-22 00:02:49,400] [INFO] [logging.py:96:log_dist] [Rank 0] step=670, skipped=9, lr=[1.316948378858478e-06, 1.316948378858478e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 669|ppo_ep: 1|act_loss: 0.0263671875|cri_loss: 0.016326904296875|unsuper_loss: 0.0
+average reward score: 0.2373046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.80%) |Training time=0.41s (19.09%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 670|ppo_ep: 1|act_loss: 0.0963134765625|cri_loss: 0.05322265625|unsuper_loss: 0.0
+average reward score: -0.88916015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.53%) |Training time=0.42s (19.35%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.81
+epoch: 0|step: 671|ppo_ep: 1|act_loss: 0.0141448974609375|cri_loss: 0.01035308837890625|unsuper_loss: 0.0
+average reward score: -1.5546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.80%) |Training time=0.41s (19.08%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 672|ppo_ep: 1|act_loss: 0.0086212158203125|cri_loss: 0.0061492919921875|unsuper_loss: 0.0
+average reward score: -1.0087890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.76%) |Training time=0.41s (19.12%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.82
+epoch: 0|step: 673|ppo_ep: 1|act_loss: 0.0135345458984375|cri_loss: 0.01085662841796875|unsuper_loss: 0.0
+average reward score: -0.50830078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.81%) |Training time=0.41s (19.07%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.82
+epoch: 0|step: 674|ppo_ep: 1|act_loss: -0.02301025390625|cri_loss: -0.0066375732421875|unsuper_loss: 0.0
+average reward score: 0.37548828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.83%) |Training time=0.41s (19.04%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.82
+epoch: 0|step: 675|ppo_ep: 1|act_loss: -0.01021575927734375|cri_loss: 0.0|unsuper_loss: 0.0
+average reward score: 0.1141357421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.69%) |Training time=0.41s (19.19%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.82
+epoch: 0|step: 676|ppo_ep: 1|act_loss: -0.041412353515625|cri_loss: -0.0190277099609375|unsuper_loss: 0.0
+average reward score: -0.9560546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.63%) |Training time=0.41s (19.26%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.82
+epoch: 0|step: 677|ppo_ep: 1|act_loss: 0.1141357421875|cri_loss: 0.06134033203125|unsuper_loss: 0.0
+average reward score: -2.740234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.80%) |Training time=0.40s (17.33%) |Others=0.11 (4.87%)|CurSamplesPerSec=13.82 |AvgSamplesPerSec=14.81
+epoch: 0|step: 678|ppo_ep: 1|act_loss: 0.0186309814453125|cri_loss: 0.0108795166015625|unsuper_loss: 0.0
+average reward score: -1.8671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.08%) |Training time=0.41s (18.83%) |Others=0.11 (5.09%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.81
+[2023-04-22 00:03:10,942] [INFO] [logging.py:96:log_dist] [Rank 0] step=680, skipped=9, lr=[2.386926119214098e-06, 2.386926119214098e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-22 00:03:10,960] [INFO] [timer.py:199:stop] epoch=0/micro_step=680/global_step=680, RunningAvgSamplesPerSec=129.2317742094129, CurrSamplesPerSec=129.0109740964417, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-22 00:03:11,052] [INFO] [logging.py:96:log_dist] [Rank 0] step=680, skipped=9, lr=[1.2367492845668901e-06, 1.2367492845668901e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 679|ppo_ep: 1|act_loss: 0.0460205078125|cri_loss: 0.030242919921875|unsuper_loss: 0.0
+average reward score: 0.059844970703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.75%) |Training time=0.41s (19.14%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 680|ppo_ep: 1|act_loss: 0.075439453125|cri_loss: 0.0390625|unsuper_loss: 0.0
+average reward score: -1.162109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.77%) |Training time=0.41s (19.11%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81
+epoch: 0|step: 681|ppo_ep: 1|act_loss: 0.0213470458984375|cri_loss: 0.016357421875|unsuper_loss: 0.0
+average reward score: -0.341552734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.86%) |Training time=0.41s (19.01%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 682|ppo_ep: 1|act_loss: 0.05267333984375|cri_loss: 0.0274200439453125|unsuper_loss: 0.0
+average reward score: -1.1796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.78%) |Training time=0.41s (19.14%) |Others=0.11 (5.08%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.81
+epoch: 0|step: 683|ppo_ep: 1|act_loss: 0.0460205078125|cri_loss: 0.030517578125|unsuper_loss: 0.0
+average reward score: -1.4345703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.74s (77.13%) |Training time=0.40s (17.97%) |Others=0.11 (4.90%)|CurSamplesPerSec=14.22 |AvgSamplesPerSec=14.81
+epoch: 0|step: 684|ppo_ep: 1|act_loss: 0.03594970703125|cri_loss: 0.01953125|unsuper_loss: 0.0
+average reward score: -1.2958984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.66%) |Training time=0.41s (19.22%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 685|ppo_ep: 1|act_loss: -0.0040435791015625|cri_loss: 0.002941131591796875|unsuper_loss: 0.0
+average reward score: -0.37841796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.55%) |Training time=0.41s (19.32%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 686|ppo_ep: 1|act_loss: -0.1070556640625|cri_loss: -0.04705810546875|unsuper_loss: 0.0
+average reward score: -0.7021484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.64%) |Training time=0.41s (19.24%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.81
+epoch: 0|step: 687|ppo_ep: 1|act_loss: 0.1385498046875|cri_loss: 0.0823974609375|unsuper_loss: 0.0
+average reward score: 0.66650390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.64%) |Training time=0.41s (19.23%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81
+epoch: 0|step: 688|ppo_ep: 1|act_loss: -0.055328369140625|cri_loss: -0.022857666015625|unsuper_loss: 0.0
+average reward score: -0.525390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.65%) |Training time=0.41s (19.24%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+[2023-04-22 00:03:32,527] [INFO] [logging.py:96:log_dist] [Rank 0] step=690, skipped=9, lr=[2.2354408639905283e-06, 2.2354408639905283e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-22 00:03:32,546] [INFO] [timer.py:199:stop] epoch=0/micro_step=690/global_step=690, RunningAvgSamplesPerSec=129.22223728355166, CurrSamplesPerSec=127.72447132669355, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-22 00:03:32,638] [INFO] [logging.py:96:log_dist] [Rank 0] step=690, skipped=9, lr=[1.158259515020999e-06, 1.158259515020999e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 689|ppo_ep: 1|act_loss: 0.058563232421875|cri_loss: 0.03790283203125|unsuper_loss: 0.0
+average reward score: -0.890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.62%) |Training time=0.41s (19.27%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 690|ppo_ep: 1|act_loss: -0.01146697998046875|cri_loss: -0.001739501953125|unsuper_loss: 0.0
+average reward score: -0.029541015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.57%) |Training time=0.41s (19.30%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 691|ppo_ep: 1|act_loss: 0.06390380859375|cri_loss: 0.039794921875|unsuper_loss: 0.0
+average reward score: -1.775390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.59%) |Training time=0.41s (19.29%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81
+epoch: 0|step: 692|ppo_ep: 1|act_loss: 0.049163818359375|cri_loss: 0.02947998046875|unsuper_loss: 0.0
+average reward score: -0.57958984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.61%) |Training time=0.41s (17.63%) |Others=0.11 (4.76%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.81
+epoch: 0|step: 693|ppo_ep: 1|act_loss: -0.039581298828125|cri_loss: -0.018524169921875|unsuper_loss: 0.0
+average reward score: -1.0068359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.67%) |Training time=0.41s (19.19%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 694|ppo_ep: 1|act_loss: 0.01068878173828125|cri_loss: 0.00983428955078125|unsuper_loss: 0.0
+average reward score: -0.206298828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.55%) |Training time=0.42s (19.34%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81
+epoch: 0|step: 695|ppo_ep: 1|act_loss: 0.01065826416015625|cri_loss: 0.01073455810546875|unsuper_loss: 0.0
+average reward score: -0.57568359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.51%) |Training time=0.42s (19.35%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.81
+epoch: 0|step: 696|ppo_ep: 1|act_loss: -0.0494384765625|cri_loss: -0.0189971923828125|unsuper_loss: 0.0
+average reward score: 0.024169921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.69%) |Training time=0.41s (19.18%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 697|ppo_ep: 1|act_loss: 0.006175994873046875|cri_loss: 0.005123138427734375|unsuper_loss: 0.0
+average reward score: -0.35400390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.64%) |Training time=0.41s (19.23%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 698|ppo_ep: 1|act_loss: 0.110107421875|cri_loss: 0.06536865234375|unsuper_loss: 0.0
+average reward score: -0.66943359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.71%) |Training time=0.41s (19.16%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+[2023-04-22 00:03:54,159] [INFO] [logging.py:96:log_dist] [Rank 0] step=700, skipped=9, lr=[2.087459582647641e-06, 2.087459582647641e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-22 00:03:54,178] [INFO] [timer.py:199:stop] epoch=0/micro_step=700/global_step=700, RunningAvgSamplesPerSec=129.19963900934587, CurrSamplesPerSec=125.25439802307102, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-22 00:03:54,270] [INFO] [logging.py:96:log_dist] [Rank 0] step=700, skipped=9, lr=[1.0815852759832337e-06, 1.0815852759832337e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 699|ppo_ep: 1|act_loss: 0.0067901611328125|cri_loss: 0.00766754150390625|unsuper_loss: 0.0
+average reward score: -1.095703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.42%) |Training time=0.42s (19.48%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.81
+epoch: 0|step: 700|ppo_ep: 1|act_loss: 0.0031528472900390625|cri_loss: 0.00409698486328125|unsuper_loss: 0.0
+average reward score: -0.25732421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.42%) |Training time=0.42s (19.45%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 701|ppo_ep: 1|act_loss: -0.05828857421875|cri_loss: -0.0258941650390625|unsuper_loss: 0.0
+average reward score: 0.0322265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.50%) |Training time=0.42s (19.39%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 702|ppo_ep: 1|act_loss: 0.029266357421875|cri_loss: 0.018768310546875|unsuper_loss: 0.0
+average reward score: -1.029296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.44%) |Training time=0.42s (19.43%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 703|ppo_ep: 1|act_loss: -0.07574462890625|cri_loss: -0.0323486328125|unsuper_loss: 0.0
+average reward score: -1.5322265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.50%) |Training time=0.42s (19.37%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 704|ppo_ep: 1|act_loss: 0.091064453125|cri_loss: 0.047698974609375|unsuper_loss: 0.0
+average reward score: -0.6513671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.36%) |Training time=0.42s (19.51%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+[2023-04-22 00:04:07,023] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, but hysteresis is 2. Reducing hysteresis to 1
+[2023-04-22 00:04:07,108] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 705|ppo_ep: 1|act_loss: 0.007541656494140625|cri_loss: 0.006694793701171875|unsuper_loss: 0.0
+average reward score: -0.25390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.62s (76.86%) |Training time=0.39s (18.34%) |Others=0.10 (4.79%)|CurSamplesPerSec=15.18 |AvgSamplesPerSec=14.82
+epoch: 0|step: 706|ppo_ep: 1|act_loss: -0.07000732421875|cri_loss: -0.02325439453125|unsuper_loss: 0.0
+average reward score: 0.00335693359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.81%) |Training time=0.41s (19.10%) |Others=0.11 (5.08%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.82
+[2023-04-22 00:04:11,406] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192
+epoch: 0|step: 707|ppo_ep: 1|act_loss: -0.03125|cri_loss: -0.01192474365234375|unsuper_loss: 0.0
+average reward score: 0.06884765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.90%) |Training time=0.41s (19.34%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.82
+epoch: 0|step: 708|ppo_ep: 1|act_loss: -0.0141754150390625|cri_loss: -0.00247955322265625|unsuper_loss: 0.0
+average reward score: -0.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.79s (77.33%) |Training time=0.41s (17.91%) |Others=0.11 (4.76%)|CurSamplesPerSec=13.82 |AvgSamplesPerSec=14.81
+[2023-04-22 00:04:15,753] [INFO] [logging.py:96:log_dist] [Rank 0] step=710, skipped=10, lr=[1.957437924380642e-06, 1.957437924380642e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-22 00:04:15,772] [INFO] [timer.py:199:stop] epoch=0/micro_step=710/global_step=710, RunningAvgSamplesPerSec=129.18221159566644, CurrSamplesPerSec=126.92775908472775, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-22 00:04:15,864] [INFO] [logging.py:96:log_dist] [Rank 0] step=710, skipped=11, lr=[1.0216228722853735e-06, 1.0216228722853735e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 709|ppo_ep: 1|act_loss: 0.0721435546875|cri_loss: 0.04412841796875|unsuper_loss: 0.0
+average reward score: -1.06640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.54%) |Training time=0.42s (19.35%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 710|ppo_ep: 1|act_loss: 0.086669921875|cri_loss: 0.0509033203125|unsuper_loss: 0.0
+average reward score: -0.283447265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.40%) |Training time=0.42s (19.48%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81
+epoch: 0|step: 711|ppo_ep: 1|act_loss: 0.01175689697265625|cri_loss: 0.01348876953125|unsuper_loss: 0.0
+average reward score: -0.767578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.81%) |Training time=0.41s (19.08%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.81
+epoch: 0|step: 712|ppo_ep: 1|act_loss: -0.018890380859375|cri_loss: -0.00286865234375|unsuper_loss: 0.0
+average reward score: 0.04656982421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.14%) |Training time=0.43s (19.73%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.81
+epoch: 0|step: 713|ppo_ep: 1|act_loss: -0.06451416015625|cri_loss: -0.026153564453125|unsuper_loss: 0.0
+average reward score: -0.8388671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.73s (76.72%) |Training time=0.41s (18.39%) |Others=0.11 (4.90%)|CurSamplesPerSec=14.21 |AvgSamplesPerSec=14.81
+epoch: 0|step: 714|ppo_ep: 1|act_loss: 0.06732177734375|cri_loss: 0.03741455078125|unsuper_loss: 0.0
+average reward score: -0.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.58%) |Training time=0.41s (19.28%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 715|ppo_ep: 1|act_loss: 0.041107177734375|cri_loss: 0.0279388427734375|unsuper_loss: 0.0
+average reward score: -0.82421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.46%) |Training time=0.42s (19.41%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 716|ppo_ep: 1|act_loss: 0.09375|cri_loss: 0.052093505859375|unsuper_loss: 0.0
+average reward score: -2.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.50%) |Training time=0.42s (19.36%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 717|ppo_ep: 1|act_loss: -0.0201416015625|cri_loss: -0.00740814208984375|unsuper_loss: 0.0
+average reward score: -0.42138671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.50%) |Training time=0.42s (19.38%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 718|ppo_ep: 1|act_loss: -0.0660400390625|cri_loss: -0.0230865478515625|unsuper_loss: 0.0
+average reward score: -0.5458984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.52%) |Training time=0.41s (19.28%) |Others=0.11 (5.20%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+[2023-04-22 00:04:37,352] [INFO] [logging.py:96:log_dist] [Rank 0] step=720, skipped=10, lr=[1.8166617060316112e-06, 1.8166617060316112e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-22 00:04:37,371] [INFO] [timer.py:199:stop] epoch=0/micro_step=720/global_step=720, RunningAvgSamplesPerSec=129.14318521309033, CurrSamplesPerSec=124.90552173273748, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-22 00:04:37,463] [INFO] [logging.py:96:log_dist] [Rank 0] step=720, skipped=11, lr=[9.48476295431443e-07, 9.48476295431443e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 719|ppo_ep: 1|act_loss: -0.054290771484375|cri_loss: -0.024017333984375|unsuper_loss: 0.0
+average reward score: -0.84033203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.40%) |Training time=0.42s (19.50%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.81
+epoch: 0|step: 720|ppo_ep: 1|act_loss: 0.05023193359375|cri_loss: 0.03253173828125|unsuper_loss: 0.0
+average reward score: -0.529296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.45%) |Training time=0.42s (19.42%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 721|ppo_ep: 1|act_loss: -0.054718017578125|cri_loss: -0.020660400390625|unsuper_loss: 0.0
+average reward score: -0.69482421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.54%) |Training time=0.41s (19.34%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 722|ppo_ep: 1|act_loss: -0.01715087890625|cri_loss: -0.00696563720703125|unsuper_loss: 0.0
+average reward score: -1.119140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.48%) |Training time=0.42s (19.40%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 723|ppo_ep: 1|act_loss: 0.02130126953125|cri_loss: 0.0194854736328125|unsuper_loss: 0.0
+average reward score: 0.0269775390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.79s (77.44%) |Training time=0.41s (17.81%) |Others=0.11 (4.76%)|CurSamplesPerSec=13.82 |AvgSamplesPerSec=14.81
+epoch: 0|step: 724|ppo_ep: 1|act_loss: 0.037017822265625|cri_loss: 0.019287109375|unsuper_loss: 0.0
+average reward score: -0.7197265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.55%) |Training time=0.41s (19.33%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 725|ppo_ep: 1|act_loss: -0.0596923828125|cri_loss: -0.023834228515625|unsuper_loss: 0.0
+average reward score: -0.12841796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.50%) |Training time=0.42s (19.38%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 726|ppo_ep: 1|act_loss: 0.05340576171875|cri_loss: 0.03375244140625|unsuper_loss: 0.0
+average reward score: 0.2296142578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.66%) |Training time=0.41s (19.21%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81
+epoch: 0|step: 727|ppo_ep: 1|act_loss: 0.025787353515625|cri_loss: 0.019775390625|unsuper_loss: 0.0
+average reward score: -0.9541015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.44%) |Training time=0.42s (19.43%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 728|ppo_ep: 1|act_loss: 0.05279541015625|cri_loss: 0.032928466796875|unsuper_loss: 0.0
+average reward score: -0.59716796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.38%) |Training time=0.42s (19.48%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+[2023-04-22 00:04:58,968] [INFO] [logging.py:96:log_dist] [Rank 0] step=730, skipped=10, lr=[1.6799561183392554e-06, 1.6799561183392554e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-22 00:04:58,987] [INFO] [timer.py:199:stop] epoch=0/micro_step=730/global_step=730, RunningAvgSamplesPerSec=129.10765431344663, CurrSamplesPerSec=126.06426925724891, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-22 00:04:59,079] [INFO] [logging.py:96:log_dist] [Rank 0] step=730, skipped=11, lr=[8.774291101150409e-07, 8.774291101150409e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 729|ppo_ep: 1|act_loss: -0.039642333984375|cri_loss: -0.01251983642578125|unsuper_loss: 0.0
+average reward score: 0.05914306640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.45%) |Training time=0.42s (19.43%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 730|ppo_ep: 1|act_loss: 0.0360107421875|cri_loss: 0.0208740234375|unsuper_loss: 0.0
+average reward score: -0.266357421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.35%) |Training time=0.42s (19.52%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 731|ppo_ep: 1|act_loss: -0.0072021484375|cri_loss: 0.00186920166015625|unsuper_loss: 0.0
+average reward score: -0.82177734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.31%) |Training time=0.42s (19.56%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 732|ppo_ep: 1|act_loss: 0.0694580078125|cri_loss: 0.04595947265625|unsuper_loss: 0.0
+average reward score: -0.93408203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.24%) |Training time=0.42s (19.64%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 733|ppo_ep: 1|act_loss: -0.025909423828125|cri_loss: -0.01158905029296875|unsuper_loss: 0.0
+average reward score: -1.8427734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.35%) |Training time=0.42s (19.54%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81
+epoch: 0|step: 734|ppo_ep: 1|act_loss: 0.0379638671875|cri_loss: 0.023162841796875|unsuper_loss: 0.0
+average reward score: -0.4833984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.57%) |Training time=0.42s (19.34%) |Others=0.11 (5.09%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.81
+epoch: 0|step: 735|ppo_ep: 1|act_loss: 0.119140625|cri_loss: 0.0655517578125|unsuper_loss: 0.0
+average reward score: -0.5087890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.14%) |Training time=0.42s (19.74%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.81
+epoch: 0|step: 736|ppo_ep: 1|act_loss: 0.006641387939453125|cri_loss: 0.0052642822265625|unsuper_loss: 0.0
+average reward score: -0.966796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.13%) |Training time=0.42s (19.75%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.81
+epoch: 0|step: 737|ppo_ep: 1|act_loss: -0.04205322265625|cri_loss: -0.0151519775390625|unsuper_loss: 0.0
+average reward score: -0.308837890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.27%) |Training time=0.42s (19.60%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 738|ppo_ep: 1|act_loss: 0.1021728515625|cri_loss: 0.0552978515625|unsuper_loss: 0.0
+average reward score: -0.8564453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.79s (77.30%) |Training time=0.41s (17.93%) |Others=0.11 (4.77%)|CurSamplesPerSec=13.86 |AvgSamplesPerSec=14.81
+[2023-04-22 00:05:20,620] [INFO] [logging.py:96:log_dist] [Rank 0] step=740, skipped=10, lr=[1.5475061398205608e-06, 1.5475061398205608e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-22 00:05:20,638] [INFO] [timer.py:199:stop] epoch=0/micro_step=740/global_step=740, RunningAvgSamplesPerSec=129.03885715382538, CurrSamplesPerSec=123.61775292240925, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-22 00:05:20,731] [INFO] [logging.py:96:log_dist] [Rank 0] step=740, skipped=11, lr=[8.085774514188454e-07, 8.085774514188454e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 739|ppo_ep: 1|act_loss: -0.0238800048828125|cri_loss: -0.0066375732421875|unsuper_loss: 0.0
+average reward score: 0.219970703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.25%) |Training time=0.42s (19.64%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.81
+epoch: 0|step: 740|ppo_ep: 1|act_loss: -0.050323486328125|cri_loss: -0.01593017578125|unsuper_loss: 0.0
+average reward score: -0.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.24%) |Training time=0.42s (19.64%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 741|ppo_ep: 1|act_loss: -0.1131591796875|cri_loss: -0.0484619140625|unsuper_loss: 0.0
+average reward score: -0.92578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.49%) |Training time=0.42s (19.41%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.81
+epoch: 0|step: 742|ppo_ep: 1|act_loss: 0.0104217529296875|cri_loss: 0.00846099853515625|unsuper_loss: 0.0
+average reward score: -1.607421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.80%) |Training time=0.44s (20.14%) |Others=0.11 (5.06%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.81
+epoch: 0|step: 743|ppo_ep: 1|act_loss: -0.0821533203125|cri_loss: -0.036956787109375|unsuper_loss: 0.0
+average reward score: -1.2998046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.73s (76.94%) |Training time=0.41s (18.17%) |Others=0.11 (4.89%)|CurSamplesPerSec=14.22 |AvgSamplesPerSec=14.81
+epoch: 0|step: 744|ppo_ep: 1|act_loss: -0.048980712890625|cri_loss: -0.0201416015625|unsuper_loss: 0.0
+average reward score: -0.8017578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.46%) |Training time=0.42s (19.42%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.81
+epoch: 0|step: 745|ppo_ep: 1|act_loss: -0.02099609375|cri_loss: -0.0076904296875|unsuper_loss: 0.0
+average reward score: -1.23828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.83%) |Training time=0.41s (19.05%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 746|ppo_ep: 1|act_loss: -0.087158203125|cri_loss: -0.0394287109375|unsuper_loss: 0.0
+average reward score: -0.9384765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.89%) |Training time=0.41s (19.00%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 747|ppo_ep: 1|act_loss: 0.04754638671875|cri_loss: 0.032012939453125|unsuper_loss: 0.0
+average reward score: -0.67919921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.91%) |Training time=0.41s (18.97%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 748|ppo_ep: 1|act_loss: -0.0220489501953125|cri_loss: -0.00676727294921875|unsuper_loss: 0.0
+average reward score: -1.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.83%) |Training time=0.41s (19.04%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81
+[2023-04-22 00:05:42,230] [INFO] [logging.py:96:log_dist] [Rank 0] step=750, skipped=10, lr=[1.4194909906596752e-06, 1.4194909906596752e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-22 00:05:42,248] [INFO] [timer.py:199:stop] epoch=0/micro_step=750/global_step=750, RunningAvgSamplesPerSec=129.0158345602834, CurrSamplesPerSec=132.17691641062004, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-22 00:05:42,584] [INFO] [logging.py:96:log_dist] [Rank 0] step=750, skipped=11, lr=[7.420144836233564e-07, 7.420144836233564e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 749|ppo_ep: 1|act_loss: 0.0299072265625|cri_loss: 0.018341064453125|unsuper_loss: 0.0
+average reward score: -1.2841796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.39s |Gather latency=0.00s (0.00%) |Generate time=1.63s (68.29%) |Training time=0.40s (16.95%) |Others=0.35 (14.75%)|CurSamplesPerSec=13.40 |AvgSamplesPerSec=14.81
+epoch: 0|step: 750|ppo_ep: 1|act_loss: -0.0872802734375|cri_loss: -0.0391845703125|unsuper_loss: 0.0
+average reward score: -0.34130859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.75%) |Training time=0.41s (19.13%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 751|ppo_ep: 1|act_loss: 0.024627685546875|cri_loss: 0.019012451171875|unsuper_loss: 0.0
+average reward score: 0.54443359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.76%) |Training time=0.41s (19.12%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81
+epoch: 0|step: 752|ppo_ep: 1|act_loss: 0.0195770263671875|cri_loss: 0.01739501953125|unsuper_loss: 0.0
+average reward score: -0.92041015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.84%) |Training time=0.41s (19.04%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 753|ppo_ep: 1|act_loss: 0.068115234375|cri_loss: 0.0450439453125|unsuper_loss: 0.0
+average reward score: -0.34716796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.87%) |Training time=0.40s (17.39%) |Others=0.11 (4.74%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.81
+epoch: 0|step: 754|ppo_ep: 1|act_loss: 0.07269287109375|cri_loss: 0.048675537109375|unsuper_loss: 0.0
+average reward score: -0.7353515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.88%) |Training time=0.41s (18.98%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 755|ppo_ep: 1|act_loss: 0.0291748046875|cri_loss: 0.021240234375|unsuper_loss: 0.0
+average reward score: -0.09405517578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.82%) |Training time=0.41s (19.07%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.81
+epoch: 0|step: 756|ppo_ep: 1|act_loss: -0.03216552734375|cri_loss: -0.003326416015625|unsuper_loss: 0.0
+average reward score: -1.314453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.81%) |Training time=0.41s (19.07%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 757|ppo_ep: 1|act_loss: 0.00405120849609375|cri_loss: 0.00621795654296875|unsuper_loss: 0.0
+average reward score: -0.67822265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.86%) |Training time=0.41s (19.01%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 758|ppo_ep: 1|act_loss: 0.05035400390625|cri_loss: 0.027923583984375|unsuper_loss: 0.0
+average reward score: -1.19921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.99%) |Training time=0.40s (18.88%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+[2023-04-22 00:06:04,108] [INFO] [logging.py:96:log_dist] [Rank 0] step=760, skipped=10, lr=[1.2960838902022257e-06, 1.2960838902022257e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-22 00:06:04,126] [INFO] [timer.py:199:stop] epoch=0/micro_step=760/global_step=760, RunningAvgSamplesPerSec=129.03406317774622, CurrSamplesPerSec=130.88857704291348, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-22 00:06:04,219] [INFO] [logging.py:96:log_dist] [Rank 0] step=760, skipped=11, lr=[6.778302741448104e-07, 6.778302741448104e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 759|ppo_ep: 1|act_loss: 0.1380615234375|cri_loss: 0.08697509765625|unsuper_loss: 0.0
+average reward score: 0.20556640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.92%) |Training time=0.41s (18.98%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 760|ppo_ep: 1|act_loss: -0.0143280029296875|cri_loss: 0.00555419921875|unsuper_loss: 0.0
+average reward score: 0.45263671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.94%) |Training time=0.41s (18.94%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 761|ppo_ep: 1|act_loss: 0.019561767578125|cri_loss: 0.02337646484375|unsuper_loss: 0.0
+average reward score: -0.7080078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.90%) |Training time=0.41s (18.97%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 762|ppo_ep: 1|act_loss: -0.0660400390625|cri_loss: -0.02301025390625|unsuper_loss: 0.0
+average reward score: -0.9970703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.11%) |Training time=0.41s (18.79%) |Others=0.11 (5.09%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.81
+epoch: 0|step: 763|ppo_ep: 1|act_loss: 0.0482177734375|cri_loss: 0.0267181396484375|unsuper_loss: 0.0
+average reward score: -0.8984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.77%) |Training time=0.41s (19.12%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.81
+epoch: 0|step: 764|ppo_ep: 1|act_loss: -0.041107177734375|cri_loss: -0.014190673828125|unsuper_loss: 0.0
+average reward score: -0.765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.78%) |Training time=0.41s (19.10%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 765|ppo_ep: 1|act_loss: 0.1265869140625|cri_loss: 0.0743408203125|unsuper_loss: 0.0
+average reward score: -0.548828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.87%) |Training time=0.41s (19.02%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 766|ppo_ep: 1|act_loss: 0.1041259765625|cri_loss: 0.0673828125|unsuper_loss: 0.0
+average reward score: -1.3291015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.62%) |Training time=0.41s (19.26%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.81
+epoch: 0|step: 767|ppo_ep: 1|act_loss: 0.0036773681640625|cri_loss: 0.007312774658203125|unsuper_loss: 0.0
+average reward score: -1.1064453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.87%) |Training time=0.41s (19.02%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 768|ppo_ep: 1|act_loss: 0.05328369140625|cri_loss: 0.034423828125|unsuper_loss: 0.0
+average reward score: -0.29150390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.84%) |Training time=0.40s (17.41%) |Others=0.11 (4.75%)|CurSamplesPerSec=13.80 |AvgSamplesPerSec=14.81
+[2023-04-22 00:06:25,760] [INFO] [logging.py:96:log_dist] [Rank 0] step=770, skipped=10, lr=[1.1774518225694339e-06, 1.1774518225694339e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-22 00:06:25,778] [INFO] [timer.py:199:stop] epoch=0/micro_step=770/global_step=770, RunningAvgSamplesPerSec=129.05144542838423, CurrSamplesPerSec=131.80426019163104, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-22 00:06:25,871] [INFO] [logging.py:96:log_dist] [Rank 0] step=770, skipped=11, lr=[6.16111671663524e-07, 6.16111671663524e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 769|ppo_ep: 1|act_loss: -0.07861328125|cri_loss: -0.0296630859375|unsuper_loss: 0.0
+average reward score: -0.77685546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.96%) |Training time=0.41s (18.92%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 770|ppo_ep: 1|act_loss: 0.05126953125|cri_loss: 0.043121337890625|unsuper_loss: 0.0
+average reward score: -0.8037109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.88%) |Training time=0.41s (19.00%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 771|ppo_ep: 1|act_loss: 0.098388671875|cri_loss: 0.064208984375|unsuper_loss: 0.0
+average reward score: -0.7880859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.93%) |Training time=0.41s (18.97%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.81
+epoch: 0|step: 772|ppo_ep: 1|act_loss: -0.03338623046875|cri_loss: -0.004425048828125|unsuper_loss: 0.0
+average reward score: -0.031341552734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.02%) |Training time=0.41s (18.94%) |Others=0.11 (5.04%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.81
+epoch: 0|step: 773|ppo_ep: 1|act_loss: 0.0093536376953125|cri_loss: 0.01087188720703125|unsuper_loss: 0.0
+average reward score: -0.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.74s (77.15%) |Training time=0.40s (17.97%) |Others=0.11 (4.88%)|CurSamplesPerSec=14.23 |AvgSamplesPerSec=14.81
+epoch: 0|step: 774|ppo_ep: 1|act_loss: -0.04620361328125|cri_loss: -0.00244140625|unsuper_loss: 0.0
+average reward score: -1.115234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.90%) |Training time=0.41s (18.97%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81
+epoch: 0|step: 775|ppo_ep: 1|act_loss: 0.02081298828125|cri_loss: 0.0173797607421875|unsuper_loss: 0.0
+average reward score: -0.59375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.90%) |Training time=0.41s (18.97%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 776|ppo_ep: 1|act_loss: 0.1087646484375|cri_loss: 0.0655517578125|unsuper_loss: 0.0
+average reward score: 0.035125732421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.81%) |Training time=0.41s (19.07%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81
+epoch: 0|step: 777|ppo_ep: 1|act_loss: 0.06744384765625|cri_loss: 0.0421142578125|unsuper_loss: 0.0
+average reward score: -0.5947265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.89%) |Training time=0.41s (18.99%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81
+epoch: 0|step: 778|ppo_ep: 1|act_loss: 0.06231689453125|cri_loss: 0.03839111328125|unsuper_loss: 0.0
+average reward score: -0.9296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.82%) |Training time=0.41s (19.06%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.81
+[2023-04-22 00:06:47,377] [INFO] [logging.py:96:log_dist] [Rank 0] step=780, skipped=10, lr=[1.0637553107092514e-06, 1.0637553107092514e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-22 00:06:47,395] [INFO] [timer.py:199:stop] epoch=0/micro_step=780/global_step=780, RunningAvgSamplesPerSec=129.06477217961265, CurrSamplesPerSec=129.224120318531, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-22 00:06:47,487] [INFO] [logging.py:96:log_dist] [Rank 0] step=780, skipped=11, lr=[5.569421886075563e-07, 5.569421886075563e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 779|ppo_ep: 1|act_loss: 0.035858154296875|cri_loss: 0.02398681640625|unsuper_loss: 0.0
+average reward score: -0.007568359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.80%) |Training time=0.41s (19.10%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.81
+epoch: 0|step: 780|ppo_ep: 1|act_loss: -0.0109710693359375|cri_loss: 0.0048370361328125|unsuper_loss: 0.0
+average reward score: -0.6044921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.77%) |Training time=0.41s (19.11%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 781|ppo_ep: 1|act_loss: -0.029937744140625|cri_loss: -0.0052337646484375|unsuper_loss: 0.0
+average reward score: -0.9375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.79%) |Training time=0.41s (19.05%) |Others=0.11 (5.16%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81
+epoch: 0|step: 782|ppo_ep: 1|act_loss: 0.002567291259765625|cri_loss: 0.008209228515625|unsuper_loss: 0.0
+average reward score: 0.55078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.63s (72.64%) |Training time=0.42s (18.90%) |Others=0.19 (8.46%)|CurSamplesPerSec=14.28 |AvgSamplesPerSec=14.81
+epoch: 0|step: 783|ppo_ep: 1|act_loss: -0.0092010498046875|cri_loss: 0.007904052734375|unsuper_loss: 0.0
+average reward score: -0.477783203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.98%) |Training time=0.41s (18.89%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81
+[2023-04-22 00:06:58,196] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192
+epoch: 0|step: 784|ppo_ep: 1|act_loss: -0.075927734375|cri_loss: -0.01995849609375|unsuper_loss: 0.0
+average reward score: -1.1171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.72%) |Training time=0.38s (18.10%) |Others=0.11 (5.18%)|CurSamplesPerSec=15.10 |AvgSamplesPerSec=14.81
+epoch: 0|step: 785|ppo_ep: 1|act_loss: 0.0209197998046875|cri_loss: 0.0208587646484375|unsuper_loss: 0.0
+average reward score: -0.91796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.75%) |Training time=0.41s (19.13%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.81
+epoch: 0|step: 786|ppo_ep: 1|act_loss: 0.06658935546875|cri_loss: 0.0540771484375|unsuper_loss: 0.0
+average reward score: -0.619140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.95%) |Training time=0.41s (18.93%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 787|ppo_ep: 1|act_loss: 0.01556396484375|cri_loss: 0.01529693603515625|unsuper_loss: 0.0
+average reward score: -1.31640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.86%) |Training time=0.41s (19.02%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 788|ppo_ep: 1|act_loss: 0.0511474609375|cri_loss: 0.03466796875|unsuper_loss: 0.0
+average reward score: -0.470703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.77%) |Training time=0.41s (19.12%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81
+[2023-04-22 00:07:08,913] [INFO] [logging.py:96:log_dist] [Rank 0] step=790, skipped=11, lr=[9.657756441308542e-07, 9.657756441308542e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-22 00:07:08,932] [INFO] [timer.py:199:stop] epoch=0/micro_step=790/global_step=790, RunningAvgSamplesPerSec=129.08285400329538, CurrSamplesPerSec=129.59669852084232, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-22 00:07:09,025] [INFO] [logging.py:96:log_dist] [Rank 0] step=790, skipped=11, lr=[5.004018881507016e-07, 5.004018881507016e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 789|ppo_ep: 1|act_loss: -0.00115966796875|cri_loss: 0.007965087890625|unsuper_loss: 0.0
+average reward score: 0.03558349609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.81%) |Training time=0.41s (19.08%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81
+epoch: 0|step: 790|ppo_ep: 1|act_loss: 0.02569580078125|cri_loss: 0.0225677490234375|unsuper_loss: 0.0
+average reward score: 0.43408203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.65%) |Training time=0.41s (18.88%) |Others=0.12 (5.47%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.81
+epoch: 0|step: 791|ppo_ep: 1|act_loss: 0.17724609375|cri_loss: 0.1102294921875|unsuper_loss: 0.0
+average reward score: 0.1558837890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.95%) |Training time=0.41s (18.92%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 792|ppo_ep: 1|act_loss: 0.10076904296875|cri_loss: 0.065185546875|unsuper_loss: 0.0
+average reward score: -1.01171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.86%) |Training time=0.41s (19.01%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+[2023-04-22 00:07:17,503] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096
+epoch: 0|step: 793|ppo_ep: 1|act_loss: -0.0789794921875|cri_loss: -0.017547607421875|unsuper_loss: 0.0
+average reward score: -0.6552734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.82%) |Training time=0.38s (18.01%) |Others=0.11 (5.17%)|CurSamplesPerSec=15.10 |AvgSamplesPerSec=14.81
+epoch: 0|step: 794|ppo_ep: 1|act_loss: 0.01470947265625|cri_loss: 0.01580810546875|unsuper_loss: 0.0
+average reward score: -0.64208984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.90%) |Training time=0.41s (18.98%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 795|ppo_ep: 1|act_loss: 0.08331298828125|cri_loss: 0.0516357421875|unsuper_loss: 0.0
+average reward score: -0.38134765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.84%) |Training time=0.41s (19.03%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 796|ppo_ep: 1|act_loss: 0.1563720703125|cri_loss: 0.0919189453125|unsuper_loss: 0.0
+average reward score: 0.177001953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.92%) |Training time=0.41s (18.95%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 797|ppo_ep: 1|act_loss: 0.157958984375|cri_loss: 0.09356689453125|unsuper_loss: 0.0
+average reward score: -0.78759765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.03%) |Training time=0.40s (18.83%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 798|ppo_ep: 1|act_loss: 0.0290374755859375|cri_loss: 0.021209716796875|unsuper_loss: 0.0
+average reward score: -0.311767578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.82s (78.13%) |Training time=0.40s (17.16%) |Others=0.11 (4.71%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.81
+[2023-04-22 00:07:30,540] [INFO] [logging.py:96:log_dist] [Rank 0] step=800, skipped=12, lr=[8.720258705240782e-07, 8.720258705240782e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-22 00:07:30,558] [INFO] [timer.py:199:stop] epoch=0/micro_step=800/global_step=800, RunningAvgSamplesPerSec=129.1268465720084, CurrSamplesPerSec=130.86203289058668, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-22 00:07:30,651] [INFO] [logging.py:96:log_dist] [Rank 0] step=800, skipped=11, lr=[4.4656727587773506e-07, 4.4656727587773506e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 799|ppo_ep: 1|act_loss: 0.1204833984375|cri_loss: 0.0723876953125|unsuper_loss: 0.0
+average reward score: 0.513671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.89%) |Training time=0.41s (18.99%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 800|ppo_ep: 1|act_loss: -0.03778076171875|cri_loss: -0.01371002197265625|unsuper_loss: 0.0
+average reward score: -1.349609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.60%) |Training time=0.42s (19.29%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.81
+epoch: 0|step: 801|ppo_ep: 1|act_loss: -0.002838134765625|cri_loss: 0.01058197021484375|unsuper_loss: 0.0
+average reward score: 0.189208984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.94%) |Training time=0.41s (18.95%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.81
+epoch: 0|step: 802|ppo_ep: 1|act_loss: -0.017333984375|cri_loss: 0.0048980712890625|unsuper_loss: 0.0
+average reward score: -0.0274658203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.61%) |Training time=0.41s (18.74%) |Others=0.12 (5.65%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.81
+epoch: 0|step: 803|ppo_ep: 1|act_loss: -0.00421142578125|cri_loss: 0.00772857666015625|unsuper_loss: 0.0
+average reward score: -0.50634765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.71s (76.84%) |Training time=0.41s (18.23%) |Others=0.11 (4.93%)|CurSamplesPerSec=14.35 |AvgSamplesPerSec=14.81
+epoch: 0|step: 804|ppo_ep: 1|act_loss: -0.0230560302734375|cri_loss: -0.0055389404296875|unsuper_loss: 0.0
+average reward score: -1.28125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.86%) |Training time=0.41s (19.02%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 805|ppo_ep: 1|act_loss: 0.068115234375|cri_loss: 0.04962158203125|unsuper_loss: 0.0
+average reward score: 0.15283203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.88%) |Training time=0.41s (19.00%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81
+epoch: 0|step: 806|ppo_ep: 1|act_loss: 0.03521728515625|cri_loss: 0.028228759765625|unsuper_loss: 0.0
+average reward score: -1.0
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.92%) |Training time=0.41s (18.96%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 807|ppo_ep: 1|act_loss: 0.0712890625|cri_loss: 0.04302978515625|unsuper_loss: 0.0
+average reward score: 0.01416015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.69%) |Training time=0.41s (19.19%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81
+epoch: 0|step: 808|ppo_ep: 1|act_loss: 0.130859375|cri_loss: 0.0811767578125|unsuper_loss: 0.0
+average reward score: -0.37646484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.72%) |Training time=0.41s (19.17%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81
+[2023-04-22 00:07:52,148] [INFO] [logging.py:96:log_dist] [Rank 0] step=810, skipped=12, lr=[7.729452588770968e-07, 7.729452588770968e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-22 00:07:52,166] [INFO] [timer.py:199:stop] epoch=0/micro_step=810/global_step=810, RunningAvgSamplesPerSec=129.13453205726572, CurrSamplesPerSec=132.02700392389087, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-22 00:07:52,259] [INFO] [logging.py:96:log_dist] [Rank 0] step=810, skipped=11, lr=[3.9551119626347693e-07, 3.9551119626347693e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 809|ppo_ep: 1|act_loss: 0.0173797607421875|cri_loss: 0.0125274658203125|unsuper_loss: 0.0
+average reward score: -0.0780029296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.01%) |Training time=0.41s (18.88%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 810|ppo_ep: 1|act_loss: 0.1641845703125|cri_loss: 0.10888671875|unsuper_loss: 0.0
+average reward score: 0.4169921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.69%) |Training time=0.41s (19.20%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.81
+epoch: 0|step: 811|ppo_ep: 1|act_loss: -0.03912353515625|cri_loss: -0.0121917724609375|unsuper_loss: 0.0
+average reward score: 0.18603515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.83%) |Training time=0.41s (18.98%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.81
+epoch: 0|step: 812|ppo_ep: 1|act_loss: 0.04656982421875|cri_loss: 0.03363037109375|unsuper_loss: 0.0
+average reward score: 0.2425537109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.78%) |Training time=0.41s (19.10%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.81
+epoch: 0|step: 813|ppo_ep: 1|act_loss: 0.1826171875|cri_loss: 0.1153564453125|unsuper_loss: 0.0
+average reward score: -0.87109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.91%) |Training time=0.41s (18.95%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 814|ppo_ep: 1|act_loss: -0.044952392578125|cri_loss: -0.014892578125|unsuper_loss: 0.0
+average reward score: -0.431640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.87%) |Training time=0.40s (17.36%) |Others=0.11 (4.77%)|CurSamplesPerSec=13.86 |AvgSamplesPerSec=14.81
+epoch: 0|step: 815|ppo_ep: 1|act_loss: -0.0244140625|cri_loss: -0.0008392333984375|unsuper_loss: 0.0
+average reward score: -1.0859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.81%) |Training time=0.41s (19.08%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 816|ppo_ep: 1|act_loss: 0.04351806640625|cri_loss: 0.02813720703125|unsuper_loss: 0.0
+average reward score: -1.40234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.92%) |Training time=0.41s (18.96%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 817|ppo_ep: 1|act_loss: 0.167724609375|cri_loss: 0.0970458984375|unsuper_loss: 0.0
+average reward score: -0.423828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.50%) |Training time=0.42s (19.41%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.81
+epoch: 0|step: 818|ppo_ep: 1|act_loss: 0.0150604248046875|cri_loss: 0.01702880859375|unsuper_loss: 0.0
+average reward score: -0.4453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.89%) |Training time=0.41s (18.99%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+[2023-04-22 00:08:13,784] [INFO] [logging.py:96:log_dist] [Rank 0] step=820, skipped=12, lr=[6.793475473031601e-07, 6.793475473031601e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-22 00:08:13,802] [INFO] [timer.py:199:stop] epoch=0/micro_step=820/global_step=820, RunningAvgSamplesPerSec=129.15223569083219, CurrSamplesPerSec=129.81329278896325, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-22 00:08:13,895] [INFO] [logging.py:96:log_dist] [Rank 0] step=820, skipped=11, lr=[3.473027341057728e-07, 3.473027341057728e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 819|ppo_ep: 1|act_loss: 0.0097503662109375|cri_loss: 0.01690673828125|unsuper_loss: 0.0
+average reward score: -1.5029296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.81%) |Training time=0.41s (19.09%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 820|ppo_ep: 1|act_loss: -0.0237579345703125|cri_loss: 0.0008087158203125|unsuper_loss: 0.0
+average reward score: -0.583984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.69%) |Training time=0.41s (19.18%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 821|ppo_ep: 1|act_loss: -0.08392333984375|cri_loss: -0.0306854248046875|unsuper_loss: 0.0
+average reward score: 0.0511474609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.91%) |Training time=0.41s (18.96%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 822|ppo_ep: 1|act_loss: 0.1055908203125|cri_loss: 0.06280517578125|unsuper_loss: 0.0
+average reward score: 0.37451171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.86%) |Training time=0.41s (19.02%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 823|ppo_ep: 1|act_loss: 0.07537841796875|cri_loss: 0.0511474609375|unsuper_loss: 0.0
+average reward score: -0.4658203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.56%) |Training time=0.42s (19.34%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.81
+epoch: 0|step: 824|ppo_ep: 1|act_loss: -0.12249755859375|cri_loss: -0.0528564453125|unsuper_loss: 0.0
+average reward score: 0.42626953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.35%) |Training time=0.40s (18.53%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81
+epoch: 0|step: 825|ppo_ep: 1|act_loss: 0.03851318359375|cri_loss: 0.034942626953125|unsuper_loss: 0.0
+average reward score: -0.4189453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.92%) |Training time=0.41s (18.97%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 826|ppo_ep: 1|act_loss: 0.00485992431640625|cri_loss: 0.0159912109375|unsuper_loss: 0.0
+average reward score: -1.5087890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.75%) |Training time=0.41s (19.13%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81
+epoch: 0|step: 827|ppo_ep: 1|act_loss: -0.03228759765625|cri_loss: -0.0021820068359375|unsuper_loss: 0.0
+average reward score: -1.2861328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.83%) |Training time=0.41s (19.05%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 828|ppo_ep: 1|act_loss: 0.060791015625|cri_loss: 0.051513671875|unsuper_loss: 0.0
+average reward score: -0.923828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.85%) |Training time=0.41s (19.03%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+[2023-04-22 00:08:35,408] [INFO] [logging.py:96:log_dist] [Rank 0] step=830, skipped=12, lr=[5.913593843626703e-07, 5.913593843626703e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-22 00:08:35,427] [INFO] [timer.py:199:stop] epoch=0/micro_step=830/global_step=830, RunningAvgSamplesPerSec=129.1667907504843, CurrSamplesPerSec=131.88378073237908, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-22 00:08:35,519] [INFO] [logging.py:96:log_dist] [Rank 0] step=830, skipped=11, lr=[3.020071210457451e-07, 3.020071210457451e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 829|ppo_ep: 1|act_loss: -0.0938720703125|cri_loss: -0.020263671875|unsuper_loss: 0.0
+average reward score: -1.02734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.79s (77.66%) |Training time=0.41s (17.59%) |Others=0.11 (4.75%)|CurSamplesPerSec=13.88 |AvgSamplesPerSec=14.81
+epoch: 0|step: 830|ppo_ep: 1|act_loss: 0.071044921875|cri_loss: 0.046875|unsuper_loss: 0.0
+average reward score: -0.0699462890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.79%) |Training time=0.41s (19.09%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 831|ppo_ep: 1|act_loss: 0.03875732421875|cri_loss: 0.031890869140625|unsuper_loss: 0.0
+average reward score: -1.3203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.56%) |Training time=0.42s (19.34%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.81
+epoch: 0|step: 832|ppo_ep: 1|act_loss: -0.0081939697265625|cri_loss: 0.00579071044921875|unsuper_loss: 0.0
+average reward score: -0.0244140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.66s (73.88%) |Training time=0.42s (18.78%) |Others=0.16 (7.34%)|CurSamplesPerSec=14.28 |AvgSamplesPerSec=14.81
+epoch: 0|step: 833|ppo_ep: 1|act_loss: 0.06817626953125|cri_loss: 0.047027587890625|unsuper_loss: 0.0
+average reward score: 0.08837890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.77%) |Training time=0.41s (19.10%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 834|ppo_ep: 1|act_loss: 0.066162109375|cri_loss: 0.048828125|unsuper_loss: 0.0
+average reward score: -1.515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.68%) |Training time=0.41s (19.21%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 835|ppo_ep: 1|act_loss: 0.0377197265625|cri_loss: 0.0400390625|unsuper_loss: 0.0
+average reward score: -1.193359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.56%) |Training time=0.42s (19.33%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.81
+epoch: 0|step: 836|ppo_ep: 1|act_loss: 0.009918212890625|cri_loss: 0.01409912109375|unsuper_loss: 0.0
+average reward score: -0.8408203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.66%) |Training time=0.41s (19.22%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 837|ppo_ep: 1|act_loss: -0.0616455078125|cri_loss: -0.023040771484375|unsuper_loss: 0.0
+average reward score: -1.115234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.90%) |Training time=0.41s (18.97%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 838|ppo_ep: 1|act_loss: 0.008087158203125|cri_loss: 0.025665283203125|unsuper_loss: 0.0
+average reward score: -1.2861328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.75%) |Training time=0.41s (19.13%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+[2023-04-22 00:08:56,969] [INFO] [logging.py:96:log_dist] [Rank 0] step=840, skipped=12, lr=[5.090998282460625e-07, 5.090998282460625e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-22 00:08:56,988] [INFO] [timer.py:199:stop] epoch=0/micro_step=840/global_step=840, RunningAvgSamplesPerSec=129.1591471189204, CurrSamplesPerSec=133.80120484930413, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-22 00:08:57,080] [INFO] [logging.py:96:log_dist] [Rank 0] step=840, skipped=11, lr=[2.596856473018208e-07, 2.596856473018208e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 839|ppo_ep: 1|act_loss: -0.03125|cri_loss: -0.0068511962890625|unsuper_loss: 0.0
+average reward score: -0.381591796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.14%) |Training time=0.40s (18.75%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 840|ppo_ep: 1|act_loss: 0.076416015625|cri_loss: 0.044891357421875|unsuper_loss: 0.0
+average reward score: 0.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.62%) |Training time=0.41s (19.26%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 841|ppo_ep: 1|act_loss: 0.097900390625|cri_loss: 0.06298828125|unsuper_loss: 0.0
+average reward score: -0.48681640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.75%) |Training time=0.41s (19.14%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 842|ppo_ep: 1|act_loss: 0.03302001953125|cri_loss: 0.0269775390625|unsuper_loss: 0.0
+average reward score: -0.4970703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.68%) |Training time=0.41s (19.20%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 843|ppo_ep: 1|act_loss: -0.0022125244140625|cri_loss: 0.0120697021484375|unsuper_loss: 0.0
+average reward score: -0.336669921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.73%) |Training time=0.41s (19.15%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 844|ppo_ep: 1|act_loss: 0.0711669921875|cri_loss: 0.047119140625|unsuper_loss: 0.0
+average reward score: -0.3251953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.70%) |Training time=0.41s (19.19%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 845|ppo_ep: 1|act_loss: 0.16259765625|cri_loss: 0.09173583984375|unsuper_loss: 0.0
+average reward score: -0.65576171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.81s (77.82%) |Training time=0.41s (17.46%) |Others=0.11 (4.72%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.81
+epoch: 0|step: 846|ppo_ep: 1|act_loss: 0.011627197265625|cri_loss: 0.01849365234375|unsuper_loss: 0.0
+average reward score: -0.126220703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.75%) |Training time=0.41s (19.11%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 847|ppo_ep: 1|act_loss: -0.0119171142578125|cri_loss: 0.0048828125|unsuper_loss: 0.0
+average reward score: -0.80712890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.74%) |Training time=0.41s (19.13%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 848|ppo_ep: 1|act_loss: -0.033416748046875|cri_loss: -0.0008544921875|unsuper_loss: 0.0
+average reward score: -0.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.87%) |Training time=0.41s (19.01%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+[2023-04-22 00:09:18,602] [INFO] [logging.py:96:log_dist] [Rank 0] step=850, skipped=12, lr=[4.326801856742557e-07, 4.326801856742557e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-22 00:09:18,620] [INFO] [timer.py:199:stop] epoch=0/micro_step=850/global_step=850, RunningAvgSamplesPerSec=129.16070369009825, CurrSamplesPerSec=130.51692071317785, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-22 00:09:18,713] [INFO] [logging.py:96:log_dist] [Rank 0] step=850, skipped=11, lr=[2.203955787369519e-07, 2.203955787369519e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 849|ppo_ep: 1|act_loss: 0.022552490234375|cri_loss: 0.0177764892578125|unsuper_loss: 0.0
+average reward score: -0.142822265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.82%) |Training time=0.41s (19.06%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 850|ppo_ep: 1|act_loss: 0.14013671875|cri_loss: 0.08056640625|unsuper_loss: 0.0
+average reward score: -0.61767578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.89%) |Training time=0.41s (18.98%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81
+epoch: 0|step: 851|ppo_ep: 1|act_loss: -0.00830078125|cri_loss: 0.00661468505859375|unsuper_loss: 0.0
+average reward score: 0.05120849609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.71%) |Training time=0.41s (19.17%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 852|ppo_ep: 1|act_loss: 0.037384033203125|cri_loss: 0.03314208984375|unsuper_loss: 0.0
+average reward score: -0.572265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.89%) |Training time=0.41s (18.98%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 853|ppo_ep: 1|act_loss: -0.075439453125|cri_loss: -0.0286407470703125|unsuper_loss: 0.0
+average reward score: -0.8759765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.93%) |Training time=0.41s (18.94%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 854|ppo_ep: 1|act_loss: -0.169921875|cri_loss: -0.076171875|unsuper_loss: 0.0
+average reward score: 0.0458984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.72%) |Training time=0.41s (19.15%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 855|ppo_ep: 1|act_loss: 0.03857421875|cri_loss: 0.034423828125|unsuper_loss: 0.0
+average reward score: 0.153564453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.88%) |Training time=0.41s (19.00%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 856|ppo_ep: 1|act_loss: 0.09478759765625|cri_loss: 0.061431884765625|unsuper_loss: 0.0
+average reward score: -0.1632080078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.91%) |Training time=0.41s (18.96%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81
+epoch: 0|step: 857|ppo_ep: 1|act_loss: -0.04522705078125|cri_loss: -0.01202392578125|unsuper_loss: 0.0
+average reward score: 0.33740234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.83%) |Training time=0.41s (19.04%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 858|ppo_ep: 1|act_loss: -0.04248046875|cri_loss: -0.0122222900390625|unsuper_loss: 0.0
+average reward score: 0.302490234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.84%) |Training time=0.41s (19.03%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+[2023-04-22 00:09:40,030] [INFO] [logging.py:96:log_dist] [Rank 0] step=860, skipped=12, lr=[3.6220386128776603e-07, 3.6220386128776603e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-22 00:09:40,048] [INFO] [timer.py:199:stop] epoch=0/micro_step=860/global_step=860, RunningAvgSamplesPerSec=129.1763337152809, CurrSamplesPerSec=130.2929145354487, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-22 00:09:40,140] [INFO] [logging.py:96:log_dist] [Rank 0] step=860, skipped=11, lr=[1.8419007937126254e-07, 1.8419007937126254e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 859|ppo_ep: 1|act_loss: 0.102783203125|cri_loss: 0.072265625|unsuper_loss: 0.0
+average reward score: -0.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.83%) |Training time=0.41s (19.07%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 860|ppo_ep: 1|act_loss: 0.06964111328125|cri_loss: 0.040771484375|unsuper_loss: 0.0
+average reward score: 0.228515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.73%) |Training time=0.41s (19.14%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 861|ppo_ep: 1|act_loss: -0.10516357421875|cri_loss: -0.0374755859375|unsuper_loss: 0.0
+average reward score: -0.98193359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.50%) |Training time=0.41s (17.76%) |Others=0.11 (4.75%)|CurSamplesPerSec=13.79 |AvgSamplesPerSec=14.81
+epoch: 0|step: 862|ppo_ep: 1|act_loss: 0.1717529296875|cri_loss: 0.10546875|unsuper_loss: 0.0
+average reward score: -0.66259765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.66s (74.20%) |Training time=0.43s (19.01%) |Others=0.15 (6.79%)|CurSamplesPerSec=14.31 |AvgSamplesPerSec=14.81
+epoch: 0|step: 863|ppo_ep: 1|act_loss: -0.00164794921875|cri_loss: 0.0216827392578125|unsuper_loss: 0.0
+average reward score: -1.15234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.77%) |Training time=0.41s (19.09%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 864|ppo_ep: 1|act_loss: 0.0867919921875|cri_loss: 0.052520751953125|unsuper_loss: 0.0
+average reward score: -0.98779296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.70%) |Training time=0.41s (19.18%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 865|ppo_ep: 1|act_loss: -0.004730224609375|cri_loss: 0.00838470458984375|unsuper_loss: 0.0
+average reward score: -0.396240234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.78%) |Training time=0.41s (19.10%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 866|ppo_ep: 1|act_loss: 0.14013671875|cri_loss: 0.08197021484375|unsuper_loss: 0.0
+average reward score: -0.88330078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.68%) |Training time=0.41s (19.21%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 867|ppo_ep: 1|act_loss: 0.128173828125|cri_loss: 0.08990478515625|unsuper_loss: 0.0
+average reward score: -0.408935546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.66%) |Training time=0.41s (19.22%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 868|ppo_ep: 1|act_loss: 0.029205322265625|cri_loss: 0.020660400390625|unsuper_loss: 0.0
+average reward score: -0.28662109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.51%) |Training time=0.42s (19.38%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.81
+[2023-04-22 00:10:01,761] [INFO] [logging.py:96:log_dist] [Rank 0] step=870, skipped=12, lr=[2.9776621772821655e-07, 2.9776621772821655e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-22 00:10:01,779] [INFO] [timer.py:199:stop] epoch=0/micro_step=870/global_step=870, RunningAvgSamplesPerSec=129.1611288792264, CurrSamplesPerSec=128.51771050373821, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-22 00:10:01,872] [INFO] [logging.py:96:log_dist] [Rank 0] step=870, skipped=11, lr=[1.511181394449654e-07, 1.511181394449654e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 869|ppo_ep: 1|act_loss: 0.173583984375|cri_loss: 0.09844970703125|unsuper_loss: 0.0
+average reward score: -0.521484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.71%) |Training time=0.41s (19.18%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 870|ppo_ep: 1|act_loss: 0.2822265625|cri_loss: 0.16162109375|unsuper_loss: 0.0
+average reward score: -0.126953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.70%) |Training time=0.41s (19.18%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 871|ppo_ep: 1|act_loss: 0.01552581787109375|cri_loss: 0.01537322998046875|unsuper_loss: 0.0
+average reward score: -0.005615234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.63%) |Training time=0.41s (19.26%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.81
+epoch: 0|step: 872|ppo_ep: 1|act_loss: -0.024993896484375|cri_loss: -0.00213623046875|unsuper_loss: 0.0
+average reward score: -1.083984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.62%) |Training time=0.41s (19.27%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.81
+epoch: 0|step: 873|ppo_ep: 1|act_loss: 0.0127716064453125|cri_loss: 0.0162506103515625|unsuper_loss: 0.0
+average reward score: -0.2098388671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.99%) |Training time=0.41s (18.93%) |Others=0.11 (5.08%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.81
+epoch: 0|step: 874|ppo_ep: 1|act_loss: 0.11865234375|cri_loss: 0.072265625|unsuper_loss: 0.0
+average reward score: 0.4326171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.71%) |Training time=0.41s (19.17%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 875|ppo_ep: 1|act_loss: 0.08056640625|cri_loss: 0.05224609375|unsuper_loss: 0.0
+average reward score: 0.36376953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.91%) |Training time=0.41s (18.98%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81
+epoch: 0|step: 876|ppo_ep: 1|act_loss: 0.048492431640625|cri_loss: 0.032745361328125|unsuper_loss: 0.0
+average reward score: -0.90234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.60%) |Training time=0.41s (17.66%) |Others=0.11 (4.74%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.81
+epoch: 0|step: 877|ppo_ep: 1|act_loss: -0.0577392578125|cri_loss: -0.0191802978515625|unsuper_loss: 0.0
+average reward score: 0.088134765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.71%) |Training time=0.41s (19.17%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 878|ppo_ep: 1|act_loss: -0.00150299072265625|cri_loss: 0.004222869873046875|unsuper_loss: 0.0
+average reward score: 0.05859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.80%) |Training time=0.41s (19.07%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+[2023-04-22 00:10:23,418] [INFO] [logging.py:96:log_dist] [Rank 0] step=880, skipped=12, lr=[2.3945444660163493e-07, 2.3945444660163493e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-22 00:10:23,436] [INFO] [timer.py:199:stop] epoch=0/micro_step=880/global_step=880, RunningAvgSamplesPerSec=129.1579314652433, CurrSamplesPerSec=129.09262182108125, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-22 00:10:23,529] [INFO] [logging.py:96:log_dist] [Rank 0] step=880, skipped=11, lr=[1.212245091288794e-07, 1.212245091288794e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 879|ppo_ep: 1|act_loss: 0.032440185546875|cri_loss: 0.0249786376953125|unsuper_loss: 0.0
+average reward score: 0.188232421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.77%) |Training time=0.41s (19.12%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81
+epoch: 0|step: 880|ppo_ep: 1|act_loss: -0.044830322265625|cri_loss: -0.00360107421875|unsuper_loss: 0.0
+average reward score: -0.5458984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.57%) |Training time=0.42s (19.32%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.81
+epoch: 0|step: 881|ppo_ep: 1|act_loss: 0.06884765625|cri_loss: 0.04541015625|unsuper_loss: 0.0
+average reward score: -0.12353515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.74%) |Training time=0.41s (19.13%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 882|ppo_ep: 1|act_loss: 0.0084991455078125|cri_loss: 0.0186309814453125|unsuper_loss: 0.0
+average reward score: -0.8642578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.68%) |Training time=0.41s (19.20%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 883|ppo_ep: 1|act_loss: 0.0063629150390625|cri_loss: 0.01184844970703125|unsuper_loss: 0.0
+average reward score: -0.37451171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.68%) |Training time=0.41s (19.20%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 884|ppo_ep: 1|act_loss: -0.11187744140625|cri_loss: -0.04296875|unsuper_loss: 0.0
+average reward score: 0.63037109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.73%) |Training time=0.41s (19.14%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 885|ppo_ep: 1|act_loss: -0.03643798828125|cri_loss: -0.005706787109375|unsuper_loss: 0.0
+average reward score: -0.31201171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.80%) |Training time=0.41s (19.07%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 886|ppo_ep: 1|act_loss: 0.0257110595703125|cri_loss: 0.023284912109375|unsuper_loss: 0.0
+average reward score: -0.473388671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.73%) |Training time=0.41s (19.14%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 887|ppo_ep: 1|act_loss: 0.05303955078125|cri_loss: 0.03485107421875|unsuper_loss: 0.0
+average reward score: -0.199462890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.91%) |Training time=0.41s (18.97%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 888|ppo_ep: 1|act_loss: 0.092041015625|cri_loss: 0.056671142578125|unsuper_loss: 0.0
+average reward score: 0.57763671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.56%) |Training time=0.42s (19.34%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.81
+[2023-04-22 00:10:44,882] [INFO] [logging.py:96:log_dist] [Rank 0] step=890, skipped=12, lr=[1.8734745049808622e-07, 1.8734745049808622e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-22 00:10:44,900] [INFO] [timer.py:199:stop] epoch=0/micro_step=890/global_step=890, RunningAvgSamplesPerSec=129.1510880560782, CurrSamplesPerSec=128.60169210574225, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-22 00:10:44,993] [INFO] [logging.py:96:log_dist] [Rank 0] step=890, skipped=11, lr=[9.45496379722624e-08, 9.45496379722624e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 889|ppo_ep: 1|act_loss: -0.0263671875|cri_loss: 0.00555419921875|unsuper_loss: 0.0
+average reward score: -0.40380859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.72%) |Training time=0.41s (19.18%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 890|ppo_ep: 1|act_loss: 0.180419921875|cri_loss: 0.099853515625|unsuper_loss: 0.0
+average reward score: -0.42724609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.72%) |Training time=0.41s (19.14%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 891|ppo_ep: 1|act_loss: 0.07855224609375|cri_loss: 0.04998779296875|unsuper_loss: 0.0
+average reward score: -0.72265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.39%) |Training time=0.42s (17.89%) |Others=0.11 (4.72%)|CurSamplesPerSec=13.75 |AvgSamplesPerSec=14.81
+epoch: 0|step: 892|ppo_ep: 1|act_loss: 0.00174713134765625|cri_loss: 0.005886077880859375|unsuper_loss: 0.0
+average reward score: 0.552734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.12%) |Training time=0.44s (19.79%) |Others=0.11 (5.09%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.81
+epoch: 0|step: 893|ppo_ep: 1|act_loss: -0.004913330078125|cri_loss: 0.0141754150390625|unsuper_loss: 0.0
+average reward score: 0.6435546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.76%) |Training time=0.41s (19.12%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81
+epoch: 0|step: 894|ppo_ep: 1|act_loss: -0.0146484375|cri_loss: -0.00069427490234375|unsuper_loss: 0.0
+average reward score: 0.0802001953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.74%) |Training time=0.41s (19.14%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 895|ppo_ep: 1|act_loss: 0.022796630859375|cri_loss: 0.02679443359375|unsuper_loss: 0.0
+average reward score: -0.583984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.69%) |Training time=0.41s (19.20%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81
+epoch: 0|step: 896|ppo_ep: 1|act_loss: 0.04742431640625|cri_loss: 0.0323486328125|unsuper_loss: 0.0
+average reward score: -0.271484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.68%) |Training time=0.41s (19.21%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.81
+epoch: 0|step: 897|ppo_ep: 1|act_loss: 0.006755828857421875|cri_loss: 0.0107879638671875|unsuper_loss: 0.0
+average reward score: -0.2408447265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.78%) |Training time=0.41s (19.09%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 898|ppo_ep: 1|act_loss: -0.0697021484375|cri_loss: -0.013885498046875|unsuper_loss: 0.0
+average reward score: 0.20166015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.79%) |Training time=0.41s (19.08%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+[2023-04-22 00:11:06,576] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, but hysteresis is 2. Reducing hysteresis to 1
+[2023-04-22 00:11:06,577] [INFO] [logging.py:96:log_dist] [Rank 0] step=900, skipped=13, lr=[1.458146855771019e-07, 1.458146855771019e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-22 00:11:06,577] [INFO] [timer.py:199:stop] epoch=0/micro_step=900/global_step=900, RunningAvgSamplesPerSec=129.14527369687394, CurrSamplesPerSec=144.12734793459072, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-22 00:11:06,669] [INFO] [logging.py:96:log_dist] [Rank 0] step=900, skipped=11, lr=[7.112962016987635e-08, 7.112962016987635e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 899|ppo_ep: 1|act_loss: 0.03289794921875|cri_loss: 0.022064208984375|unsuper_loss: 0.0
+average reward score: -1.609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.71%) |Training time=0.38s (18.13%) |Others=0.11 (5.16%)|CurSamplesPerSec=15.10 |AvgSamplesPerSec=14.81
+epoch: 0|step: 900|ppo_ep: 1|act_loss: 0.035003662109375|cri_loss: 0.036163330078125|unsuper_loss: 0.0
+average reward score: -0.07470703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.91%) |Training time=0.41s (18.96%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81
+epoch: 0|step: 901|ppo_ep: 1|act_loss: 0.045989990234375|cri_loss: 0.037445068359375|unsuper_loss: 0.0
+average reward score: -0.1923828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.81%) |Training time=0.41s (19.13%) |Others=0.11 (5.07%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.81
+epoch: 0|step: 902|ppo_ep: 1|act_loss: 0.055419921875|cri_loss: 0.0379638671875|unsuper_loss: 0.0
+average reward score: -0.10693359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.60%) |Training time=0.41s (19.28%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81
+epoch: 0|step: 903|ppo_ep: 1|act_loss: 0.180908203125|cri_loss: 0.1085205078125|unsuper_loss: 0.0
+average reward score: -0.662109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.65%) |Training time=0.41s (19.24%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 904|ppo_ep: 1|act_loss: -0.04833984375|cri_loss: -0.013946533203125|unsuper_loss: 0.0
+average reward score: 0.10009765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.54%) |Training time=0.42s (19.35%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.81
+epoch: 0|step: 905|ppo_ep: 1|act_loss: -0.0296783447265625|cri_loss: -0.001434326171875|unsuper_loss: 0.0
+average reward score: -0.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.76%) |Training time=0.41s (19.11%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 906|ppo_ep: 1|act_loss: -0.1195068359375|cri_loss: -0.0462646484375|unsuper_loss: 0.0
+average reward score: 0.04644775390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.68%) |Training time=0.41s (17.58%) |Others=0.11 (4.74%)|CurSamplesPerSec=13.82 |AvgSamplesPerSec=14.81
+epoch: 0|step: 907|ppo_ep: 1|act_loss: -0.059722900390625|cri_loss: -0.022705078125|unsuper_loss: 0.0
+average reward score: -0.9794921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.67%) |Training time=0.41s (19.21%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 908|ppo_ep: 1|act_loss: -0.06658935546875|cri_loss: -0.022247314453125|unsuper_loss: 0.0
+average reward score: 0.41015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.65%) |Training time=0.41s (19.24%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81
+[2023-04-22 00:11:28,216] [INFO] [logging.py:96:log_dist] [Rank 0] step=910, skipped=13, lr=[1.0568399210089959e-07, 1.0568399210089959e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-22 00:11:28,234] [INFO] [timer.py:199:stop] epoch=0/micro_step=910/global_step=910, RunningAvgSamplesPerSec=129.13780240885887, CurrSamplesPerSec=129.61697352473936, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-22 00:11:28,327] [INFO] [logging.py:96:log_dist] [Rank 0] step=910, skipped=11, lr=[5.099614572235623e-08, 5.099614572235623e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 909|ppo_ep: 1|act_loss: -0.007720947265625|cri_loss: 0.00485992431640625|unsuper_loss: 0.0
+average reward score: -1.40234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.78%) |Training time=0.41s (19.11%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+[2023-04-22 00:11:30,351] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096
+epoch: 0|step: 910|ppo_ep: 1|act_loss: -0.0655517578125|cri_loss: -0.0225067138671875|unsuper_loss: 0.0
+average reward score: -0.63720703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.62s (76.66%) |Training time=0.38s (18.16%) |Others=0.11 (5.18%)|CurSamplesPerSec=15.11 |AvgSamplesPerSec=14.81
+epoch: 0|step: 911|ppo_ep: 1|act_loss: -0.044464111328125|cri_loss: -0.009521484375|unsuper_loss: 0.0
+average reward score: 0.269287109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.79%) |Training time=0.41s (19.09%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+[2023-04-22 00:11:34,728] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 912|ppo_ep: 1|act_loss: 0.08880615234375|cri_loss: 0.0516357421875|unsuper_loss: 0.0
+average reward score: -0.25634765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.45%) |Training time=0.40s (18.79%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.81
+epoch: 0|step: 913|ppo_ep: 1|act_loss: -0.008819580078125|cri_loss: 0.01165771484375|unsuper_loss: 0.0
+average reward score: -0.296630859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.49%) |Training time=0.42s (19.40%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.81
+epoch: 0|step: 914|ppo_ep: 1|act_loss: 0.11029052734375|cri_loss: 0.063232421875|unsuper_loss: 0.0
+average reward score: -0.11328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.77%) |Training time=0.41s (19.11%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 915|ppo_ep: 1|act_loss: 0.11956787109375|cri_loss: 0.07684326171875|unsuper_loss: 0.0
+average reward score: -0.09716796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.84%) |Training time=0.41s (19.04%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 916|ppo_ep: 1|act_loss: 0.0509033203125|cri_loss: 0.036712646484375|unsuper_loss: 0.0
+average reward score: -0.67822265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.61%) |Training time=0.41s (19.27%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81
+epoch: 0|step: 917|ppo_ep: 1|act_loss: 0.015533447265625|cri_loss: 0.01422882080078125|unsuper_loss: 0.0
+average reward score: -0.1829833984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.79%) |Training time=0.41s (19.08%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 918|ppo_ep: 1|act_loss: -0.02410888671875|cri_loss: 0.00177001953125|unsuper_loss: 0.0
+average reward score: -0.1407470703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.66%) |Training time=0.41s (19.23%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81
+[2023-04-22 00:11:49,644] [INFO] [logging.py:96:log_dist] [Rank 0] step=920, skipped=14, lr=[7.502484219857415e-08, 7.502484219857415e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-22 00:11:49,662] [INFO] [timer.py:199:stop] epoch=0/micro_step=920/global_step=920, RunningAvgSamplesPerSec=129.15088417010656, CurrSamplesPerSec=128.13918927820959, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-22 00:11:49,755] [INFO] [logging.py:96:log_dist] [Rank 0] step=920, skipped=12, lr=[3.5708622066224494e-08, 3.5708622066224494e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 919|ppo_ep: 1|act_loss: -0.07293701171875|cri_loss: -0.019866943359375|unsuper_loss: 0.0
+average reward score: 0.2802734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.66%) |Training time=0.41s (19.23%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 920|ppo_ep: 1|act_loss: -0.0058746337890625|cri_loss: 0.0059814453125|unsuper_loss: 0.0
+average reward score: -0.5361328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.75%) |Training time=0.41s (19.13%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 921|ppo_ep: 1|act_loss: 0.1065673828125|cri_loss: 0.06158447265625|unsuper_loss: 0.0
+average reward score: -0.45849609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.72s (76.16%) |Training time=0.42s (18.75%) |Others=0.12 (5.09%)|CurSamplesPerSec=14.15 |AvgSamplesPerSec=14.81
+epoch: 0|step: 922|ppo_ep: 1|act_loss: 0.068603515625|cri_loss: 0.05145263671875|unsuper_loss: 0.0
+average reward score: -0.859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.50%) |Training time=0.42s (19.44%) |Others=0.11 (5.06%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.81
+epoch: 0|step: 923|ppo_ep: 1|act_loss: 0.06939697265625|cri_loss: 0.046966552734375|unsuper_loss: 0.0
+average reward score: -0.028076171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.77%) |Training time=0.41s (19.11%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+[2023-04-22 00:12:00,630] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
+epoch: 0|step: 924|ppo_ep: 1|act_loss: 0.0167083740234375|cri_loss: 0.019012451171875|unsuper_loss: 0.0
+average reward score: 0.87451171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.09%) |Training time=0.41s (19.17%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.81
+epoch: 0|step: 925|ppo_ep: 1|act_loss: -0.1307373046875|cri_loss: -0.046844482421875|unsuper_loss: 0.0
+average reward score: -0.41357421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.66%) |Training time=0.41s (19.22%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.81
+epoch: 0|step: 926|ppo_ep: 1|act_loss: 0.15087890625|cri_loss: 0.085693359375|unsuper_loss: 0.0
+average reward score: -0.346923828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.92%) |Training time=0.41s (18.95%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 927|ppo_ep: 1|act_loss: -0.0184173583984375|cri_loss: 0.0004119873046875|unsuper_loss: 0.0
+average reward score: -0.0367431640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.98%) |Training time=0.40s (18.90%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 928|ppo_ep: 1|act_loss: -0.04833984375|cri_loss: -0.001373291015625|unsuper_loss: 0.0
+average reward score: -0.9365234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.73%) |Training time=0.41s (19.16%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81
+[2023-04-22 00:12:11,261] [INFO] [logging.py:96:log_dist] [Rank 0] step=930, skipped=14, lr=[4.7066422011146386e-08, 4.7066422011146386e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-22 00:12:11,279] [INFO] [timer.py:199:stop] epoch=0/micro_step=930/global_step=930, RunningAvgSamplesPerSec=129.16257215861955, CurrSamplesPerSec=131.13961886597778, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-22 00:12:11,372] [INFO] [logging.py:96:log_dist] [Rank 0] step=930, skipped=13, lr=[2.312207866372762e-08, 2.312207866372762e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 929|ppo_ep: 1|act_loss: -0.0010528564453125|cri_loss: 0.0113983154296875|unsuper_loss: 0.0
+average reward score: -0.236328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.09%) |Training time=0.41s (18.84%) |Others=0.11 (5.07%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.81
+epoch: 0|step: 930|ppo_ep: 1|act_loss: -0.048583984375|cri_loss: -0.008270263671875|unsuper_loss: 0.0
+average reward score: -1.8974609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.61%) |Training time=0.41s (19.27%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81
+epoch: 0|step: 931|ppo_ep: 1|act_loss: 0.043243408203125|cri_loss: 0.0360107421875|unsuper_loss: 0.0
+average reward score: 0.132568359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.64%) |Training time=0.41s (19.24%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81
+epoch: 0|step: 932|ppo_ep: 1|act_loss: -0.022979736328125|cri_loss: 0.005218505859375|unsuper_loss: 0.0
+average reward score: 0.273681640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.75%) |Training time=0.41s (19.13%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 933|ppo_ep: 1|act_loss: -0.030517578125|cri_loss: -0.0006256103515625|unsuper_loss: 0.0
+average reward score: -0.388427734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.77%) |Training time=0.41s (19.10%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.81
+epoch: 0|step: 934|ppo_ep: 1|act_loss: 0.05572509765625|cri_loss: 0.0394287109375|unsuper_loss: 0.0
+average reward score: 0.1448974609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.77%) |Training time=0.41s (19.11%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+epoch: 0|step: 935|ppo_ep: 1|act_loss: -0.040863037109375|cri_loss: -0.01328277587890625|unsuper_loss: 0.0
+average reward score: 0.3115234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.73%) |Training time=0.41s (19.15%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 936|ppo_ep: 1|act_loss: 0.136962890625|cri_loss: 0.0850830078125|unsuper_loss: 0.0
+average reward score: 0.4033203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.87%) |Training time=0.41s (19.00%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 937|ppo_ep: 1|act_loss: 0.1923828125|cri_loss: 0.11328125|unsuper_loss: 0.0
+average reward score: -0.1240234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.82s (78.02%) |Training time=0.40s (17.27%) |Others=0.11 (4.71%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.81
+epoch: 0|step: 938|ppo_ep: 1|act_loss: -0.1480712890625|cri_loss: -0.05029296875|unsuper_loss: 0.0
+average reward score: 0.0418701171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.71%) |Training time=0.41s (19.18%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+[2023-04-22 00:12:32,909] [INFO] [logging.py:96:log_dist] [Rank 0] step=940, skipped=14, lr=[2.55731001596078e-08, 2.55731001596078e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-22 00:12:32,927] [INFO] [timer.py:199:stop] epoch=0/micro_step=940/global_step=940, RunningAvgSamplesPerSec=129.16296815177412, CurrSamplesPerSec=128.4635870106203, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-22 00:12:33,020] [INFO] [logging.py:96:log_dist] [Rank 0] step=940, skipped=13, lr=[1.2321525434263737e-08, 1.2321525434263737e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 939|ppo_ep: 1|act_loss: 0.126953125|cri_loss: 0.080322265625|unsuper_loss: 0.0
+average reward score: -0.37646484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.68%) |Training time=0.41s (19.21%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 940|ppo_ep: 1|act_loss: 0.120361328125|cri_loss: 0.07830810546875|unsuper_loss: 0.0
+average reward score: 0.11474609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.83%) |Training time=0.41s (19.03%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+epoch: 0|step: 941|ppo_ep: 1|act_loss: 0.080078125|cri_loss: 0.045806884765625|unsuper_loss: 0.0
+average reward score: 0.0250244140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.89%) |Training time=0.41s (18.98%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.81
+epoch: 0|step: 942|ppo_ep: 1|act_loss: 0.041839599609375|cri_loss: 0.029327392578125|unsuper_loss: 0.0
+average reward score: -1.208984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.86%) |Training time=0.41s (19.01%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.81
+[2023-04-22 00:12:41,583] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192
+epoch: 0|step: 943|ppo_ep: 1|act_loss: 0.0704345703125|cri_loss: 0.0430908203125|unsuper_loss: 0.0
+average reward score: -0.189697265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (76.01%) |Training time=0.41s (19.23%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.81
+epoch: 0|step: 944|ppo_ep: 1|act_loss: 0.1177978515625|cri_loss: 0.0665283203125|unsuper_loss: 0.0
+average reward score: -0.91162109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.70%) |Training time=0.41s (19.19%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.81
+epoch: 0|step: 945|ppo_ep: 1|act_loss: 0.030120849609375|cri_loss: 0.035247802734375|unsuper_loss: 0.0
+average reward score: -1.4453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.57%) |Training time=0.42s (19.31%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.81
+epoch: 0|step: 946|ppo_ep: 1|act_loss: -0.0516357421875|cri_loss: -0.0136260986328125|unsuper_loss: 0.0
+average reward score: -0.309814453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.00%) |Training time=0.40s (18.88%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 947|ppo_ep: 1|act_loss: 0.08056640625|cri_loss: 0.049224853515625|unsuper_loss: 0.0
+average reward score: 0.128662109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.77%) |Training time=0.41s (19.10%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 948|ppo_ep: 1|act_loss: 0.0703125|cri_loss: 0.0458984375|unsuper_loss: 0.0
+average reward score: 0.0665283203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.78%) |Training time=0.41s (19.09%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.81
+[2023-04-22 00:12:54,348] [INFO] [logging.py:96:log_dist] [Rank 0] step=950, skipped=14, lr=[1.0573959601489598e-08, 1.0573959601489598e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-22 00:12:54,366] [INFO] [timer.py:199:stop] epoch=0/micro_step=950/global_step=950, RunningAvgSamplesPerSec=129.16878047990338, CurrSamplesPerSec=129.97294169711395, MemAllocated=12.06GB, MaxMemAllocated=21.83GB
+[2023-04-22 00:12:54,459] [INFO] [logging.py:96:log_dist] [Rank 0] step=950, skipped=14, lr=[5.478735544813263e-09, 5.478735544813263e-09], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 949|ppo_ep: 1|act_loss: 0.058837890625|cri_loss: 0.0377197265625|unsuper_loss: 0.0
+average reward score: -0.6123046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.82%) |Training time=0.41s (19.07%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 950|ppo_ep: 1|act_loss: 0.00640869140625|cri_loss: 0.0272064208984375|unsuper_loss: 0.0
+average reward score: -0.7890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.77%) |Training time=0.41s (19.10%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.81
+epoch: 0|step: 951|ppo_ep: 1|act_loss: 0.0693359375|cri_loss: 0.04315185546875|unsuper_loss: 0.0
+average reward score: -0.7109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.70%) |Training time=0.41s (19.18%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.81
+epoch: 0|step: 952|ppo_ep: 1|act_loss: 0.00537872314453125|cri_loss: 0.00829315185546875|unsuper_loss: 0.0
+average reward score: 0.279296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.84s (77.65%) |Training time=0.42s (17.66%) |Others=0.11 (4.70%)|CurSamplesPerSec=13.53 |AvgSamplesPerSec=14.81
+[2023-04-22 00:13:02,885] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 4096, reducing to 2048
+epoch: 0|step: 953|ppo_ep: 1|act_loss: 0.1746826171875|cri_loss: 0.09466552734375|unsuper_loss: 0.0
+average reward score: -2.0
 -------------------------------------------------------------------------------------
+saving model ...saving model ...
+saving model ...saving model ...saving model ...saving model ...saving model ...
 saving model ...
-saving model ...saving model ...saving model ...
 
 
-saving model ...saving model ...
-saving model ...saving model ...
 
 
-[2023-04-14 13:49:56,880] [INFO] [launch.py:460:main] Process 412204 exits successfully.
-[2023-04-14 13:49:57,881] [INFO] [launch.py:460:main] Process 412199 exits successfully.
-[2023-04-14 13:49:57,881] [INFO] [launch.py:460:main] Process 412200 exits successfully.
-[2023-04-14 13:49:57,881] [INFO] [launch.py:460:main] Process 412205 exits successfully.
-[2023-04-14 13:49:57,881] [INFO] [launch.py:460:main] Process 412201 exits successfully.
-[2023-04-14 13:49:58,883] [INFO] [launch.py:460:main] Process 412202 exits successfully.
-[2023-04-14 13:49:58,883] [INFO] [launch.py:460:main] Process 412203 exits successfully.
-[2023-04-14 13:50:01,886] [INFO] [launch.py:460:main] Process 412198 exits successfully.
+
+[2023-04-22 00:13:06,284] [INFO] [launch.py:460:main] Process 2910018 exits successfully.
+[2023-04-22 00:13:07,285] [INFO] [launch.py:460:main] Process 2910014 exits successfully.
+[2023-04-22 00:13:08,287] [INFO] [launch.py:460:main] Process 2910016 exits successfully.
+[2023-04-22 00:13:08,287] [INFO] [launch.py:460:main] Process 2910013 exits successfully.
+[2023-04-22 00:13:08,287] [INFO] [launch.py:460:main] Process 2910017 exits successfully.
+[2023-04-22 00:13:08,287] [INFO] [launch.py:460:main] Process 2910015 exits successfully.
+[2023-04-22 00:13:08,287] [INFO] [launch.py:460:main] Process 2910012 exits successfully.
+[2023-04-22 00:13:16,295] [INFO] [launch.py:460:main] Process 2910011 exits successfully.