{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 0.8469312191009521, "min": 0.8232175707817078, "max": 3.295804023742676, "count": 1695 }, "SoccerTwos.Policy.Entropy.sum": { "value": 9187.509765625, "min": 2003.845703125, "max": 105465.6875, "count": 1695 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 64.5, "min": 40.152542372881356, "max": 999.0, "count": 1695 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 10320.0, "min": 2732.0, "max": 28832.0, "count": 1695 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1763.6273213154295, "min": 1189.8904352353843, "max": 1771.968606144515, "count": 1664 }, "SoccerTwos.Self-play.ELO.sum": { "value": 141090.18570523435, "min": 2381.0377987029947, "max": 202012.20693031192, "count": 1664 }, "SoccerTwos.Step.mean": { "value": 8479958.0, "min": 4432.0, "max": 8479958.0, "count": 1696 }, "SoccerTwos.Step.sum": { "value": 8479958.0, "min": 4432.0, "max": 8479958.0, "count": 1696 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": -0.040951985865831375, "min": -0.19922977685928345, "max": 0.2191123515367508, "count": 1696 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": -3.2761588096618652, "min": -13.849787712097168, "max": 17.74810028076172, "count": 1696 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": -0.04459637776017189, "min": -0.19301573932170868, "max": 0.2158556580543518, "count": 1696 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": -3.5677101612091064, "min": -14.456816673278809, "max": 17.48430824279785, "count": 1696 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 1696 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 1696 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": -0.22534499913454056, "min": -1.0, "max": 0.7153999871677823, "count": 1696 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": -18.027599930763245, "min": -41.74519991874695, "max": 40.31680017709732, "count": 1696 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": -0.22534499913454056, "min": -1.0, "max": 0.7153999871677823, "count": 1696 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": -18.027599930763245, "min": -41.74519991874695, "max": 40.31680017709732, "count": 1696 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 1696 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 1696 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.012244973257184029, "min": 0.010488120992304175, "max": 0.016860684389248492, "count": 205 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.012244973257184029, "min": 0.010488120992304175, "max": 0.016860684389248492, "count": 205 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.11452542960643769, "min": 0.0012547600199468434, "max": 0.12134995341300964, "count": 205 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.11452542960643769, "min": 0.0012547600199468434, "max": 0.12134995341300964, "count": 205 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.1195459270477295, "min": 0.001435923029202968, "max": 0.12682862743735313, "count": 205 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.1195459270477295, "min": 0.001435923029202968, "max": 0.12682862743735313, "count": 205 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 8.311875688126e-05, "min": 8.311875688126e-05, "max": 9.9915052084948e-05, "count": 205 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 8.311875688126e-05, "min": 8.311875688126e-05, "max": 9.9915052084948e-05, "count": 205 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.162339055, "min": 0.162339055, "max": 0.17493628899999997, "count": 205 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.162339055, "min": 0.162339055, "max": 0.17493628899999997, "count": 205 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.0008328755259999999, "min": 0.0008328755259999999, "max": 0.0009991590148, "count": 205 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.0008328755259999999, "min": 0.0008328755259999999, "max": 0.0009991590148, "count": 205 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1734530482", "python_version": "3.10.12 | packaged by Anaconda, Inc. | (main, Jul 5 2023, 19:01:18) [MSC v.1916 64 bit (AMD64)]", "command_line_arguments": "\\\\?\\C:\\Users\\Edu\\miniconda3\\envs\\rl\\Scripts\\mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.exe --run-id=SoccerTwos --no-graphics", "mlagents_version": "1.2.0.dev0", "mlagents_envs_version": "1.2.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.5.1+cpu", "numpy_version": "1.23.5", "end_time_seconds": "1734592781" }, "total": 62297.521005999995, "count": 1, "self": 0.010206600010860711, "children": { "run_training.setup": { "total": 0.09557769999082666, "count": 1, "self": 0.09557769999082666 }, "TrainerController.start_learning": { "total": 62297.415221699994, "count": 1, "self": 14.6314888002089, "children": { "TrainerController._reset_env": { "total": 15.382453300029738, "count": 85, "self": 15.382453300029738 }, "TrainerController.advance": { "total": 62267.04685259974, "count": 585681, "self": 14.093730005988618, "children": { "env_step": { "total": 10437.005436695807, "count": 585681, "self": 7298.554581102231, "children": { "SubprocessEnvManager._take_step": { "total": 3129.628976492575, "count": 585681, "self": 85.2579285860993, "children": { "TorchPolicy.evaluate": { "total": 3044.3710479064757, "count": 1075560, "self": 3044.3710479064757 } } }, "workers": { "total": 8.821879101000377, "count": 585681, "self": 0.0, "children": { "worker_root": { "total": 62246.82085750632, "count": 585681, "is_parallel": true, "self": 56644.32421551256, "children": { "steps_from_proto": { "total": 0.145146500057308, "count": 170, "is_parallel": true, "self": 0.02975290002359543, "children": { "_process_rank_one_or_two_observation": { "total": 0.11539360003371257, "count": 680, "is_parallel": true, "self": 0.11539360003371257 } } }, "UnityEnvironment.step": { "total": 5602.351495493698, "count": 585681, "is_parallel": true, "self": 316.47514569666237, "children": { "UnityEnvironment._generate_step_input": { "total": 278.7953514022229, "count": 585681, "is_parallel": true, "self": 278.7953514022229 }, "communicator.exchange": { "total": 3961.8991054948565, "count": 585681, "is_parallel": true, "self": 3961.8991054948565 }, "steps_from_proto": { "total": 1045.1818928999564, "count": 1171362, "is_parallel": true, "self": 209.20651879774232, "children": { "_process_rank_one_or_two_observation": { "total": 835.9753741022141, "count": 4685448, "is_parallel": true, "self": 835.9753741022141 } } } } } } } } } } }, "trainer_advance": { "total": 51815.94768589795, "count": 585681, "self": 99.25371239797096, "children": { "process_trajectory": { "total": 4604.251587100065, "count": 585681, "self": 4599.446594900117, "children": { "RLTrainer._checkpoint": { "total": 4.804992199948174, "count": 16, "self": 4.804992199948174 } } }, "_update_policy": { "total": 47112.44238639991, "count": 206, "self": 1881.7498688001651, "children": { "TorchPOCAOptimizer.update": { "total": 45230.692517599746, "count": 10253, "self": 45230.692517599746 } } } } } } }, "trainer_threads": { "total": 1.300009898841381e-06, "count": 1, "self": 1.300009898841381e-06 }, "TrainerController._save_models": { "total": 0.3544257000030484, "count": 1, "self": 0.006995100004132837, "children": { "RLTrainer._checkpoint": { "total": 0.34743059999891557, "count": 1, "self": 0.34743059999891557 } } } } } } }