ZhihongDeng's picture
First Push
5bcdc41
{
"name": "root",
"gauges": {
"SoccerTwos.Policy.Entropy.mean": {
"value": 0.5361879467964172,
"min": 0.5062462091445923,
"max": 3.2957143783569336,
"count": 10000
},
"SoccerTwos.Policy.Entropy.sum": {
"value": 10792.390625,
"min": 631.1412353515625,
"max": 390950.0625,
"count": 10000
},
"SoccerTwos.Environment.EpisodeLength.mean": {
"value": 64.55263157894737,
"min": 34.75,
"max": 999.0,
"count": 10000
},
"SoccerTwos.Environment.EpisodeLength.sum": {
"value": 19624.0,
"min": 9744.0,
"max": 27944.0,
"count": 10000
},
"SoccerTwos.Self-play.ELO.mean": {
"value": 1655.243002041564,
"min": 1187.9343140936849,
"max": 1763.0418844163403,
"count": 9914
},
"SoccerTwos.Self-play.ELO.sum": {
"value": 251596.93631031775,
"min": 2375.8686281873697,
"max": 451934.74502955977,
"count": 9914
},
"SoccerTwos.Step.mean": {
"value": 99999944.0,
"min": 9718.0,
"max": 99999944.0,
"count": 10000
},
"SoccerTwos.Step.sum": {
"value": 99999944.0,
"min": 9718.0,
"max": 99999944.0,
"count": 10000
},
"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
"value": -0.03534884750843048,
"min": -0.1369738131761551,
"max": 0.2670484185218811,
"count": 10000
},
"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
"value": -5.408373832702637,
"min": -25.899005889892578,
"max": 37.65174865722656,
"count": 10000
},
"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
"value": -0.0353168249130249,
"min": -0.1389307826757431,
"max": 0.26651522517204285,
"count": 10000
},
"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
"value": -5.4034743309021,
"min": -25.871084213256836,
"max": 37.80921173095703,
"count": 10000
},
"SoccerTwos.Environment.CumulativeReward.mean": {
"value": 0.0,
"min": 0.0,
"max": 0.0,
"count": 10000
},
"SoccerTwos.Environment.CumulativeReward.sum": {
"value": 0.0,
"min": 0.0,
"max": 0.0,
"count": 10000
},
"SoccerTwos.Policy.ExtrinsicReward.mean": {
"value": -0.20054117565840676,
"min": -0.5881272716955706,
"max": 0.6285795958674684,
"count": 10000
},
"SoccerTwos.Policy.ExtrinsicReward.sum": {
"value": -30.682799875736237,
"min": -80.2020001411438,
"max": 82.94919991493225,
"count": 10000
},
"SoccerTwos.Environment.GroupCumulativeReward.mean": {
"value": -0.20054117565840676,
"min": -0.5881272716955706,
"max": 0.6285795958674684,
"count": 10000
},
"SoccerTwos.Environment.GroupCumulativeReward.sum": {
"value": -30.682799875736237,
"min": -80.2020001411438,
"max": 82.94919991493225,
"count": 10000
},
"SoccerTwos.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 10000
},
"SoccerTwos.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 10000
},
"SoccerTwos.Losses.PolicyLoss.mean": {
"value": 0.02380004380441581,
"min": 0.00802251725399401,
"max": 0.026490185890967646,
"count": 4848
},
"SoccerTwos.Losses.PolicyLoss.sum": {
"value": 0.02380004380441581,
"min": 0.00802251725399401,
"max": 0.026490185890967646,
"count": 4848
},
"SoccerTwos.Losses.ValueLoss.mean": {
"value": 0.10786419784029325,
"min": 2.5130218242945073e-06,
"max": 0.1378081552684307,
"count": 4848
},
"SoccerTwos.Losses.ValueLoss.sum": {
"value": 0.10786419784029325,
"min": 2.5130218242945073e-06,
"max": 0.1378081552684307,
"count": 4848
},
"SoccerTwos.Losses.BaselineLoss.mean": {
"value": 0.10805333033204079,
"min": 2.2917931763307325e-06,
"max": 0.14002898931503296,
"count": 4848
},
"SoccerTwos.Losses.BaselineLoss.sum": {
"value": 0.10805333033204079,
"min": 2.2917931763307325e-06,
"max": 0.14002898931503296,
"count": 4848
},
"SoccerTwos.Policy.LearningRate.mean": {
"value": 3.98439867519971e-08,
"min": 3.98439867519971e-08,
"max": 0.000299931636022788,
"count": 4848
},
"SoccerTwos.Policy.LearningRate.sum": {
"value": 3.98439867519971e-08,
"min": 3.98439867519971e-08,
"max": 0.000299931636022788,
"count": 4848
},
"SoccerTwos.Policy.Epsilon.mean": {
"value": 0.10001324799999996,
"min": 0.10001324799999996,
"max": 0.19997721200000002,
"count": 4848
},
"SoccerTwos.Policy.Epsilon.sum": {
"value": 0.10001324799999996,
"min": 0.10001324799999996,
"max": 0.19997721200000002,
"count": 4848
},
"SoccerTwos.Policy.Beta.mean": {
"value": 1.0661075199999953e-05,
"min": 1.0661075199999953e-05,
"max": 0.0049988628788,
"count": 4848
},
"SoccerTwos.Policy.Beta.sum": {
"value": 1.0661075199999953e-05,
"min": 1.0661075199999953e-05,
"max": 0.0049988628788,
"count": 4848
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1676882850",
"python_version": "3.9.16 (main, Jan 11 2023, 16:05:54) \n[GCC 11.2.0]",
"command_line_arguments": "/home/zhihdeng/anaconda3/envs/HuggingFaceDRL/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --no-graphics --run-id=SoccerTwosMLP",
"mlagents_version": "0.31.0.dev0",
"mlagents_envs_version": "0.31.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "1.11.0+cu102",
"numpy_version": "1.21.2",
"end_time_seconds": "1676971223"
},
"total": 88373.85427070409,
"count": 1,
"self": 0.5767905004322529,
"children": {
"run_training.setup": {
"total": 0.0484183095395565,
"count": 1,
"self": 0.0484183095395565
},
"TrainerController.start_learning": {
"total": 88373.22906189412,
"count": 1,
"self": 88.28237944841385,
"children": {
"TrainerController._reset_env": {
"total": 71.24583523720503,
"count": 500,
"self": 71.24583523720503
},
"TrainerController.advance": {
"total": 88213.365519315,
"count": 3946472,
"self": 86.39167696610093,
"children": {
"env_step": {
"total": 43111.556869752705,
"count": 3946472,
"self": 13688.833719231188,
"children": {
"SubprocessEnvManager._take_step": {
"total": 29358.235021069646,
"count": 7046286,
"self": 1046.72575295344,
"children": {
"TorchPolicy.evaluate": {
"total": 28311.509268116206,
"count": 12770724,
"self": 28311.509268116206
}
}
},
"workers": {
"total": 64.48812945187092,
"count": 3946472,
"self": 0.0,
"children": {
"worker_root": {
"total": 354218.49521360174,
"count": 7045365,
"is_parallel": true,
"self": 277056.83259246126,
"children": {
"run_training.setup": {
"total": 0.193673238158226,
"count": 4,
"is_parallel": true,
"self": 0.08062634989619255,
"children": {
"steps_from_proto": {
"total": 0.011169001460075378,
"count": 8,
"is_parallel": true,
"self": 0.003017112612724304,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.008151888847351074,
"count": 32,
"is_parallel": true,
"self": 0.008151888847351074
}
}
},
"UnityEnvironment.step": {
"total": 0.10187788680195808,
"count": 4,
"is_parallel": true,
"self": 0.0025879032909870148,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.001802206039428711,
"count": 4,
"is_parallel": true,
"self": 0.001802206039428711
},
"communicator.exchange": {
"total": 0.08945160359144211,
"count": 4,
"is_parallel": true,
"self": 0.08945160359144211
},
"steps_from_proto": {
"total": 0.00803617388010025,
"count": 8,
"is_parallel": true,
"self": 0.0016578957438468933,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.006378278136253357,
"count": 32,
"is_parallel": true,
"self": 0.006378278136253357
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 77157.02932818234,
"count": 7045357,
"is_parallel": true,
"self": 5271.69396962598,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 2848.0237276628613,
"count": 7045357,
"is_parallel": true,
"self": 2848.0237276628613
},
"communicator.exchange": {
"total": 55228.5793344304,
"count": 7045357,
"is_parallel": true,
"self": 55228.5793344304
},
"steps_from_proto": {
"total": 13808.732296463102,
"count": 14090714,
"is_parallel": true,
"self": 2671.1489330865443,
"children": {
"_process_rank_one_or_two_observation": {
"total": 11137.583363376558,
"count": 56362856,
"is_parallel": true,
"self": 11137.583363376558
}
}
}
}
},
"steps_from_proto": {
"total": 4.439619719982147,
"count": 3992,
"is_parallel": true,
"self": 0.8451655693352222,
"children": {
"_process_rank_one_or_two_observation": {
"total": 3.594454150646925,
"count": 15968,
"is_parallel": true,
"self": 3.594454150646925
}
}
},
"TrainerController.start_learning": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"TrainerController._reset_env": {
"total": 147.0211791843176,
"count": 8,
"is_parallel": true,
"self": 147.0211791843176
},
"TrainerController.advance": {
"total": 884.1772038340569,
"count": 39316,
"is_parallel": true,
"self": 0.8210047781467438,
"children": {
"env_step": {
"total": 511.9983355551958,
"count": 39316,
"is_parallel": true,
"self": 163.43248023465276,
"children": {
"SubprocessEnvManager._take_step": {
"total": 347.6746134161949,
"count": 76720,
"is_parallel": true,
"self": 11.412553071975708,
"children": {
"TorchPolicy.evaluate": {
"total": 336.2620603442192,
"count": 152664,
"is_parallel": true,
"self": 336.2620603442192
}
}
},
"workers": {
"total": 0.743629664182663,
"count": 39316,
"is_parallel": true,
"self": 0.0,
"children": {
"worker_root": {
"total": 4125.666426867247,
"count": 76696,
"is_parallel": true,
"self": 3296.758002460003,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.044676005840301514,
"count": 32,
"is_parallel": true,
"self": 0.012068450450897217,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0326075553894043,
"count": 128,
"is_parallel": true,
"self": 0.0326075553894043
}
}
},
"UnityEnvironment.step": {
"total": 0.40751154720783234,
"count": 16,
"is_parallel": true,
"self": 0.010351613163948059,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.007208824157714844,
"count": 16,
"is_parallel": true,
"self": 0.007208824157714844
},
"communicator.exchange": {
"total": 0.35780641436576843,
"count": 16,
"is_parallel": true,
"self": 0.35780641436576843
},
"steps_from_proto": {
"total": 0.032144695520401,
"count": 32,
"is_parallel": true,
"self": 0.006631582975387573,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.025513112545013428,
"count": 128,
"is_parallel": true,
"self": 0.025513112545013428
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 828.8764188885689,
"count": 76680,
"is_parallel": true,
"self": 46.903960809111595,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 30.588029012084007,
"count": 76680,
"is_parallel": true,
"self": 30.588029012084007
},
"communicator.exchange": {
"total": 609.1386138498783,
"count": 76680,
"is_parallel": true,
"self": 609.1386138498783
},
"steps_from_proto": {
"total": 142.24581521749496,
"count": 153360,
"is_parallel": true,
"self": 28.042156845331192,
"children": {
"_process_rank_one_or_two_observation": {
"total": 114.20365837216377,
"count": 613440,
"is_parallel": true,
"self": 114.20365837216377
}
}
}
}
},
"steps_from_proto": {
"total": 0.032005518674850464,
"count": 32,
"is_parallel": true,
"self": 0.0062446147203445435,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.02576090395450592,
"count": 128,
"is_parallel": true,
"self": 0.02576090395450592
}
}
}
}
}
}
},
"steps_from_proto": {
"total": 0.019269373267889023,
"count": 8,
"is_parallel": true,
"self": 0.003829497843980789,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.015439875423908234,
"count": 32,
"is_parallel": true,
"self": 0.015439875423908234
}
}
},
"UnityEnvironment.step": {
"total": 0.128342866897583,
"count": 4,
"is_parallel": true,
"self": 0.007858805358409882,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.004416607320308685,
"count": 4,
"is_parallel": true,
"self": 0.004416607320308685
},
"communicator.exchange": {
"total": 0.10227135568857193,
"count": 4,
"is_parallel": true,
"self": 0.10227135568857193
},
"steps_from_proto": {
"total": 0.013796098530292511,
"count": 8,
"is_parallel": true,
"self": 0.0021491684019565582,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.011646930128335953,
"count": 32,
"is_parallel": true,
"self": 0.011646930128335953
}
}
}
}
}
}
},
"trainer_advance": {
"total": 371.3578635007143,
"count": 39316,
"is_parallel": true,
"self": 13.995508208870888,
"children": {
"process_trajectory": {
"total": 101.5431860089302,
"count": 39316,
"is_parallel": true,
"self": 101.5431860089302
},
"_update_policy": {
"total": 255.8191692829132,
"count": 44,
"is_parallel": true,
"self": 165.65164488554,
"children": {
"TorchPOCAOptimizer.update": {
"total": 90.1675243973732,
"count": 1392,
"is_parallel": true,
"self": 90.1675243973732
}
}
}
}
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 45015.4169725962,
"count": 3946472,
"self": 1186.358848668635,
"children": {
"process_trajectory": {
"total": 19193.84821567312,
"count": 3946472,
"self": 19120.074692908674,
"children": {
"RLTrainer._checkpoint": {
"total": 73.77352276444435,
"count": 200,
"self": 73.77352276444435
}
}
},
"_update_policy": {
"total": 24635.209908254445,
"count": 4848,
"self": 16051.347006946802,
"children": {
"TorchPOCAOptimizer.update": {
"total": 8583.862901307642,
"count": 145476,
"self": 8583.862901307642
}
}
}
}
}
}
},
"trainer_threads": {
"total": 1.1771917343139648e-06,
"count": 1,
"self": 1.1771917343139648e-06
},
"TrainerController._save_models": {
"total": 0.3353267163038254,
"count": 1,
"self": 0.009651627391576767,
"children": {
"RLTrainer._checkpoint": {
"total": 0.3256750889122486,
"count": 1,
"self": 0.3256750889122486
}
}
}
}
}
}
}