{ "best_metric": null, "best_model_checkpoint": null, "epoch": 40.0, "eval_steps": 500, "global_step": 106560, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 9.384384384384385e-09, "loss": 30.0191, "step": 1 }, { "epoch": 0.2, "learning_rate": 5.001876876876877e-06, "loss": 28.4983, "step": 533 }, { "epoch": 0.4, "learning_rate": 1.0003753753753754e-05, "loss": 21.5164, "step": 1066 }, { "epoch": 0.6, "learning_rate": 1.5005630630630632e-05, "loss": 7.132, "step": 1599 }, { "epoch": 0.8, "learning_rate": 2.000750750750751e-05, "loss": 0.8541, "step": 2132 }, { "epoch": 1.0, "eval_loss": 0.3403850197792053, "eval_max_distance": 1, "eval_mean_distance": 0, "eval_runtime": 1.1154, "eval_samples_per_second": 320.968, "eval_steps_per_second": 21.517, "step": 2664 }, { "epoch": 1.0, "learning_rate": 2.500938438438439e-05, "loss": 0.3348, "step": 2665 }, { "epoch": 1.2, "learning_rate": 3.0011261261261263e-05, "loss": 0.5279, "step": 3198 }, { "epoch": 1.4, "learning_rate": 3.501313813813814e-05, "loss": 0.2294, "step": 3731 }, { "epoch": 1.6, "learning_rate": 4.001501501501502e-05, "loss": 0.0605, "step": 4264 }, { "epoch": 1.8, "learning_rate": 4.5016891891891895e-05, "loss": 0.0451, "step": 4797 }, { "epoch": 2.0, "eval_loss": 0.060470160096883774, "eval_max_distance": 1, "eval_mean_distance": 0, "eval_runtime": 0.9182, "eval_samples_per_second": 389.888, "eval_steps_per_second": 26.138, "step": 5328 }, { "epoch": 2.0, "learning_rate": 5.001876876876878e-05, "loss": 0.0605, "step": 5330 }, { "epoch": 2.2, "learning_rate": 5.502064564564565e-05, "loss": 0.025, "step": 5863 }, { "epoch": 2.4, "learning_rate": 6.0022522522522526e-05, "loss": 0.0193, "step": 6396 }, { "epoch": 2.6, "learning_rate": 6.502439939939941e-05, "loss": 0.0153, "step": 6929 }, { "epoch": 2.8, "learning_rate": 7.002627627627628e-05, "loss": 0.0112, "step": 7462 }, { "epoch": 3.0, "eval_loss": 0.04112406447529793, "eval_max_distance": 1, "eval_mean_distance": 0, "eval_runtime": 0.9146, "eval_samples_per_second": 391.418, "eval_steps_per_second": 26.24, "step": 7992 }, { "epoch": 3.0, "learning_rate": 7.502815315315315e-05, "loss": 0.0109, "step": 7995 }, { "epoch": 3.2, "learning_rate": 8.003003003003004e-05, "loss": 0.0095, "step": 8528 }, { "epoch": 3.4, "learning_rate": 8.50319069069069e-05, "loss": 0.012, "step": 9061 }, { "epoch": 3.6, "learning_rate": 9.003378378378379e-05, "loss": 0.0223, "step": 9594 }, { "epoch": 3.8, "learning_rate": 9.503566066066066e-05, "loss": 0.0068, "step": 10127 }, { "epoch": 4.0, "eval_loss": 0.020507752895355225, "eval_max_distance": 1, "eval_mean_distance": 0, "eval_runtime": 0.9253, "eval_samples_per_second": 386.912, "eval_steps_per_second": 25.938, "step": 10656 }, { "epoch": 4.0, "learning_rate": 9.999582916249583e-05, "loss": 0.0077, "step": 10660 }, { "epoch": 4.2, "learning_rate": 9.944006506506507e-05, "loss": 0.0058, "step": 11193 }, { "epoch": 4.4, "learning_rate": 9.88843009676343e-05, "loss": 0.0072, "step": 11726 }, { "epoch": 4.6, "learning_rate": 9.832853687020355e-05, "loss": 0.0097, "step": 12259 }, { "epoch": 4.8, "learning_rate": 9.777277277277279e-05, "loss": 0.007, "step": 12792 }, { "epoch": 5.0, "eval_loss": 0.02420434169471264, "eval_max_distance": 1, "eval_mean_distance": 0, "eval_runtime": 0.9141, "eval_samples_per_second": 391.658, "eval_steps_per_second": 26.256, "step": 13320 }, { "epoch": 5.0, "learning_rate": 9.721700867534201e-05, "loss": 0.0049, "step": 13325 }, { "epoch": 5.2, "learning_rate": 9.666124457791124e-05, "loss": 0.005, "step": 13858 }, { "epoch": 5.4, "learning_rate": 9.610548048048048e-05, "loss": 0.0035, "step": 14391 }, { "epoch": 5.6, "learning_rate": 9.554971638304973e-05, "loss": 0.0061, "step": 14924 }, { "epoch": 5.8, "learning_rate": 9.499395228561896e-05, "loss": 0.0022, "step": 15457 }, { "epoch": 6.0, "eval_loss": 0.027173461392521858, "eval_max_distance": 1, "eval_mean_distance": 0, "eval_runtime": 0.9101, "eval_samples_per_second": 393.378, "eval_steps_per_second": 26.372, "step": 15984 }, { "epoch": 6.0, "learning_rate": 9.44381881881882e-05, "loss": 0.0048, "step": 15990 }, { "epoch": 6.2, "learning_rate": 9.388242409075743e-05, "loss": 0.0053, "step": 16523 }, { "epoch": 6.4, "learning_rate": 9.332665999332665e-05, "loss": 0.005, "step": 17056 }, { "epoch": 6.6, "learning_rate": 9.27708958958959e-05, "loss": 0.0034, "step": 17589 }, { "epoch": 6.8, "learning_rate": 9.221513179846514e-05, "loss": 0.0054, "step": 18122 }, { "epoch": 7.0, "eval_loss": 0.008011276833713055, "eval_max_distance": 1, "eval_mean_distance": 0, "eval_runtime": 0.9187, "eval_samples_per_second": 389.697, "eval_steps_per_second": 26.125, "step": 18648 }, { "epoch": 7.0, "learning_rate": 9.165936770103437e-05, "loss": 0.0036, "step": 18655 }, { "epoch": 7.2, "learning_rate": 9.110360360360361e-05, "loss": 0.002, "step": 19188 }, { "epoch": 7.4, "learning_rate": 9.054783950617284e-05, "loss": 0.0029, "step": 19721 }, { "epoch": 7.6, "learning_rate": 8.999207540874208e-05, "loss": 0.0031, "step": 20254 }, { "epoch": 7.8, "learning_rate": 8.943631131131131e-05, "loss": 0.0036, "step": 20787 }, { "epoch": 8.0, "eval_loss": 0.025212394073605537, "eval_max_distance": 1, "eval_mean_distance": 0, "eval_runtime": 0.9136, "eval_samples_per_second": 391.872, "eval_steps_per_second": 26.271, "step": 21312 }, { "epoch": 8.0, "learning_rate": 8.888054721388055e-05, "loss": 0.0022, "step": 21320 }, { "epoch": 8.2, "learning_rate": 8.832478311644978e-05, "loss": 0.0021, "step": 21853 }, { "epoch": 8.4, "learning_rate": 8.776901901901903e-05, "loss": 0.0016, "step": 22386 }, { "epoch": 8.6, "learning_rate": 8.721325492158827e-05, "loss": 0.0021, "step": 22919 }, { "epoch": 8.8, "learning_rate": 8.665749082415749e-05, "loss": 0.0039, "step": 23452 }, { "epoch": 9.0, "eval_loss": 0.020978303626179695, "eval_max_distance": 1, "eval_mean_distance": 0, "eval_runtime": 0.9126, "eval_samples_per_second": 392.285, "eval_steps_per_second": 26.298, "step": 23976 }, { "epoch": 9.0, "learning_rate": 8.610172672672672e-05, "loss": 0.0032, "step": 23985 }, { "epoch": 9.2, "learning_rate": 8.554596262929596e-05, "loss": 0.0027, "step": 24518 }, { "epoch": 9.4, "learning_rate": 8.499019853186521e-05, "loss": 0.0022, "step": 25051 }, { "epoch": 9.6, "learning_rate": 8.443443443443444e-05, "loss": 0.002, "step": 25584 }, { "epoch": 9.8, "learning_rate": 8.387867033700368e-05, "loss": 0.0026, "step": 26117 }, { "epoch": 10.0, "eval_loss": 0.017031751573085785, "eval_max_distance": 1, "eval_mean_distance": 0, "eval_runtime": 0.9145, "eval_samples_per_second": 391.481, "eval_steps_per_second": 26.245, "step": 26640 }, { "epoch": 10.0, "learning_rate": 8.332290623957291e-05, "loss": 0.0014, "step": 26650 }, { "epoch": 10.2, "learning_rate": 8.276714214214215e-05, "loss": 0.0016, "step": 27183 }, { "epoch": 10.4, "learning_rate": 8.221137804471138e-05, "loss": 0.0045, "step": 27716 }, { "epoch": 10.6, "learning_rate": 8.165561394728062e-05, "loss": 0.0017, "step": 28249 }, { "epoch": 10.8, "learning_rate": 8.109984984984985e-05, "loss": 0.0026, "step": 28782 }, { "epoch": 11.0, "eval_loss": 0.004335461650043726, "eval_max_distance": 1, "eval_mean_distance": 0, "eval_runtime": 0.9104, "eval_samples_per_second": 393.245, "eval_steps_per_second": 26.363, "step": 29304 }, { "epoch": 11.0, "learning_rate": 8.054408575241909e-05, "loss": 0.0016, "step": 29315 }, { "epoch": 11.2, "learning_rate": 7.998832165498832e-05, "loss": 0.0006, "step": 29848 }, { "epoch": 11.4, "learning_rate": 7.943255755755756e-05, "loss": 0.0011, "step": 30381 }, { "epoch": 11.6, "learning_rate": 7.88767934601268e-05, "loss": 0.0009, "step": 30914 }, { "epoch": 11.8, "learning_rate": 7.832102936269603e-05, "loss": 0.0029, "step": 31447 }, { "epoch": 12.0, "eval_loss": 0.013542454689741135, "eval_max_distance": 1, "eval_mean_distance": 0, "eval_runtime": 0.9125, "eval_samples_per_second": 392.323, "eval_steps_per_second": 26.301, "step": 31968 }, { "epoch": 12.0, "learning_rate": 7.776526526526526e-05, "loss": 0.0029, "step": 31980 }, { "epoch": 12.2, "learning_rate": 7.720950116783451e-05, "loss": 0.0008, "step": 32513 }, { "epoch": 12.4, "learning_rate": 7.665373707040375e-05, "loss": 0.0027, "step": 33046 }, { "epoch": 12.6, "learning_rate": 7.609797297297297e-05, "loss": 0.0016, "step": 33579 }, { "epoch": 12.8, "learning_rate": 7.55422088755422e-05, "loss": 0.0011, "step": 34112 }, { "epoch": 13.0, "eval_loss": 0.03128792718052864, "eval_max_distance": 1, "eval_mean_distance": 0, "eval_runtime": 0.9117, "eval_samples_per_second": 392.662, "eval_steps_per_second": 26.324, "step": 34632 }, { "epoch": 13.0, "learning_rate": 7.498644477811145e-05, "loss": 0.0007, "step": 34645 }, { "epoch": 13.2, "learning_rate": 7.443068068068069e-05, "loss": 0.0014, "step": 35178 }, { "epoch": 13.41, "learning_rate": 7.387491658324992e-05, "loss": 0.0067, "step": 35711 }, { "epoch": 13.61, "learning_rate": 7.331915248581916e-05, "loss": 0.0008, "step": 36244 }, { "epoch": 13.81, "learning_rate": 7.27633883883884e-05, "loss": 0.0017, "step": 36777 }, { "epoch": 14.0, "eval_loss": 0.03530227765440941, "eval_max_distance": 1, "eval_mean_distance": 0, "eval_runtime": 0.9178, "eval_samples_per_second": 390.083, "eval_steps_per_second": 26.151, "step": 37296 }, { "epoch": 14.01, "learning_rate": 7.220762429095763e-05, "loss": 0.001, "step": 37310 }, { "epoch": 14.21, "learning_rate": 7.165186019352686e-05, "loss": 0.001, "step": 37843 }, { "epoch": 14.41, "learning_rate": 7.10960960960961e-05, "loss": 0.0012, "step": 38376 }, { "epoch": 14.61, "learning_rate": 7.054033199866533e-05, "loss": 0.0011, "step": 38909 }, { "epoch": 14.81, "learning_rate": 6.998456790123458e-05, "loss": 0.0014, "step": 39442 }, { "epoch": 15.0, "eval_loss": 0.011675473302602768, "eval_max_distance": 1, "eval_mean_distance": 0, "eval_runtime": 0.916, "eval_samples_per_second": 390.823, "eval_steps_per_second": 26.2, "step": 39960 }, { "epoch": 15.01, "learning_rate": 6.94288038038038e-05, "loss": 0.001, "step": 39975 }, { "epoch": 15.21, "learning_rate": 6.887303970637304e-05, "loss": 0.0003, "step": 40508 }, { "epoch": 15.41, "learning_rate": 6.831727560894227e-05, "loss": 0.002, "step": 41041 }, { "epoch": 15.61, "learning_rate": 6.776151151151151e-05, "loss": 0.001, "step": 41574 }, { "epoch": 15.81, "learning_rate": 6.720574741408076e-05, "loss": 0.0014, "step": 42107 }, { "epoch": 16.0, "eval_loss": 0.0139808664098382, "eval_max_distance": 1, "eval_mean_distance": 0, "eval_runtime": 0.9131, "eval_samples_per_second": 392.074, "eval_steps_per_second": 26.284, "step": 42624 }, { "epoch": 16.01, "learning_rate": 6.664998331665e-05, "loss": 0.0007, "step": 42640 }, { "epoch": 16.21, "learning_rate": 6.609421921921923e-05, "loss": 0.0004, "step": 43173 }, { "epoch": 16.41, "learning_rate": 6.553845512178845e-05, "loss": 0.0006, "step": 43706 }, { "epoch": 16.61, "learning_rate": 6.498269102435769e-05, "loss": 0.0017, "step": 44239 }, { "epoch": 16.81, "learning_rate": 6.442692692692693e-05, "loss": 0.0013, "step": 44772 }, { "epoch": 17.0, "eval_loss": 0.022025227546691895, "eval_max_distance": 1, "eval_mean_distance": 0, "eval_runtime": 0.9176, "eval_samples_per_second": 390.127, "eval_steps_per_second": 26.154, "step": 45288 }, { "epoch": 17.01, "learning_rate": 6.387116282949617e-05, "loss": 0.0013, "step": 45305 }, { "epoch": 17.21, "learning_rate": 6.33153987320654e-05, "loss": 0.0003, "step": 45838 }, { "epoch": 17.41, "learning_rate": 6.275963463463464e-05, "loss": 0.0011, "step": 46371 }, { "epoch": 17.61, "learning_rate": 6.220387053720387e-05, "loss": 0.0004, "step": 46904 }, { "epoch": 17.81, "learning_rate": 6.164810643977311e-05, "loss": 0.0009, "step": 47437 }, { "epoch": 18.0, "eval_loss": 0.024678541347384453, "eval_max_distance": 1, "eval_mean_distance": 0, "eval_runtime": 0.908, "eval_samples_per_second": 394.28, "eval_steps_per_second": 26.432, "step": 47952 }, { "epoch": 18.01, "learning_rate": 6.109234234234234e-05, "loss": 0.0023, "step": 47970 }, { "epoch": 18.21, "learning_rate": 6.053657824491158e-05, "loss": 0.0007, "step": 48503 }, { "epoch": 18.41, "learning_rate": 5.9980814147480815e-05, "loss": 0.0015, "step": 49036 }, { "epoch": 18.61, "learning_rate": 5.9425050050050057e-05, "loss": 0.0004, "step": 49569 }, { "epoch": 18.81, "learning_rate": 5.886928595261929e-05, "loss": 0.0017, "step": 50102 }, { "epoch": 19.0, "eval_loss": 0.03220739960670471, "eval_max_distance": 1, "eval_mean_distance": 0, "eval_runtime": 0.9151, "eval_samples_per_second": 391.2, "eval_steps_per_second": 26.226, "step": 50616 }, { "epoch": 19.01, "learning_rate": 5.831352185518853e-05, "loss": 0.0011, "step": 50635 }, { "epoch": 19.21, "learning_rate": 5.7757757757757755e-05, "loss": 0.0009, "step": 51168 }, { "epoch": 19.41, "learning_rate": 5.720199366032699e-05, "loss": 0.0005, "step": 51701 }, { "epoch": 19.61, "learning_rate": 5.664622956289624e-05, "loss": 0.0011, "step": 52234 }, { "epoch": 19.81, "learning_rate": 5.609046546546547e-05, "loss": 0.0022, "step": 52767 }, { "epoch": 20.0, "eval_loss": 0.0314439982175827, "eval_max_distance": 1, "eval_mean_distance": 0, "eval_runtime": 0.9074, "eval_samples_per_second": 394.526, "eval_steps_per_second": 26.449, "step": 53280 }, { "epoch": 20.01, "learning_rate": 5.55347013680347e-05, "loss": 0.001, "step": 53300 }, { "epoch": 20.21, "learning_rate": 5.497893727060394e-05, "loss": 0.001, "step": 53833 }, { "epoch": 20.41, "learning_rate": 5.442317317317318e-05, "loss": 0.0, "step": 54366 }, { "epoch": 20.61, "learning_rate": 5.3867409075742415e-05, "loss": 0.0015, "step": 54899 }, { "epoch": 20.81, "learning_rate": 5.331164497831165e-05, "loss": 0.0006, "step": 55432 }, { "epoch": 21.0, "eval_loss": 0.030524656176567078, "eval_max_distance": 1, "eval_mean_distance": 0, "eval_runtime": 0.9097, "eval_samples_per_second": 393.555, "eval_steps_per_second": 26.384, "step": 55944 }, { "epoch": 21.01, "learning_rate": 5.275588088088088e-05, "loss": 0.0005, "step": 55965 }, { "epoch": 21.21, "learning_rate": 5.220011678345011e-05, "loss": 0.0006, "step": 56498 }, { "epoch": 21.41, "learning_rate": 5.164435268601936e-05, "loss": 0.0012, "step": 57031 }, { "epoch": 21.61, "learning_rate": 5.108858858858859e-05, "loss": 0.0005, "step": 57564 }, { "epoch": 21.81, "learning_rate": 5.0532824491157825e-05, "loss": 0.001, "step": 58097 }, { "epoch": 22.0, "eval_loss": 0.029209736734628677, "eval_max_distance": 1, "eval_mean_distance": 0, "eval_runtime": 0.9114, "eval_samples_per_second": 392.814, "eval_steps_per_second": 26.334, "step": 58608 }, { "epoch": 22.01, "learning_rate": 4.997706039372707e-05, "loss": 0.0007, "step": 58630 }, { "epoch": 22.21, "learning_rate": 4.94212962962963e-05, "loss": 0.0011, "step": 59163 }, { "epoch": 22.41, "learning_rate": 4.886553219886553e-05, "loss": 0.0004, "step": 59696 }, { "epoch": 22.61, "learning_rate": 4.830976810143477e-05, "loss": 0.001, "step": 60229 }, { "epoch": 22.81, "learning_rate": 4.775400400400401e-05, "loss": 0.0008, "step": 60762 }, { "epoch": 23.0, "eval_loss": 0.03728558123111725, "eval_max_distance": 1, "eval_mean_distance": 0, "eval_runtime": 0.9095, "eval_samples_per_second": 393.613, "eval_steps_per_second": 26.387, "step": 61272 }, { "epoch": 23.01, "learning_rate": 4.719823990657324e-05, "loss": 0.0001, "step": 61295 }, { "epoch": 23.21, "learning_rate": 4.664247580914248e-05, "loss": 0.0006, "step": 61828 }, { "epoch": 23.41, "learning_rate": 4.608671171171172e-05, "loss": 0.0002, "step": 62361 }, { "epoch": 23.61, "learning_rate": 4.553094761428095e-05, "loss": 0.0009, "step": 62894 }, { "epoch": 23.81, "learning_rate": 4.497518351685018e-05, "loss": 0.0008, "step": 63427 }, { "epoch": 24.0, "eval_loss": 0.030942877754569054, "eval_max_distance": 1, "eval_mean_distance": 0, "eval_runtime": 0.9149, "eval_samples_per_second": 391.282, "eval_steps_per_second": 26.231, "step": 63936 }, { "epoch": 24.01, "learning_rate": 4.4419419419419425e-05, "loss": 0.0003, "step": 63960 }, { "epoch": 24.21, "learning_rate": 4.386365532198865e-05, "loss": 0.0007, "step": 64493 }, { "epoch": 24.41, "learning_rate": 4.3307891224557895e-05, "loss": 0.0004, "step": 65026 }, { "epoch": 24.61, "learning_rate": 4.275212712712713e-05, "loss": 0.0001, "step": 65559 }, { "epoch": 24.81, "learning_rate": 4.2196363029696365e-05, "loss": 0.0008, "step": 66092 }, { "epoch": 25.0, "eval_loss": 0.038451410830020905, "eval_max_distance": 1, "eval_mean_distance": 0, "eval_runtime": 0.9087, "eval_samples_per_second": 393.969, "eval_steps_per_second": 26.411, "step": 66600 }, { "epoch": 25.01, "learning_rate": 4.16405989322656e-05, "loss": 0.0006, "step": 66625 }, { "epoch": 25.21, "learning_rate": 4.1084834834834836e-05, "loss": 0.0, "step": 67158 }, { "epoch": 25.41, "learning_rate": 4.052907073740407e-05, "loss": 0.0013, "step": 67691 }, { "epoch": 25.61, "learning_rate": 3.9973306639973306e-05, "loss": 0.0001, "step": 68224 }, { "epoch": 25.81, "learning_rate": 3.941754254254255e-05, "loss": 0.0014, "step": 68757 }, { "epoch": 26.0, "eval_loss": 0.0133729362860322, "eval_max_distance": 1, "eval_mean_distance": 0, "eval_runtime": 0.9209, "eval_samples_per_second": 388.755, "eval_steps_per_second": 26.062, "step": 69264 }, { "epoch": 26.01, "learning_rate": 3.8861778445111776e-05, "loss": 0.0001, "step": 69290 }, { "epoch": 26.21, "learning_rate": 3.830601434768102e-05, "loss": 0.0008, "step": 69823 }, { "epoch": 26.41, "learning_rate": 3.775025025025025e-05, "loss": 0.0008, "step": 70356 }, { "epoch": 26.61, "learning_rate": 3.719448615281949e-05, "loss": 0.0002, "step": 70889 }, { "epoch": 26.81, "learning_rate": 3.663872205538872e-05, "loss": 0.0004, "step": 71422 }, { "epoch": 27.0, "eval_loss": 0.023867754265666008, "eval_max_distance": 1, "eval_mean_distance": 0, "eval_runtime": 0.9071, "eval_samples_per_second": 394.647, "eval_steps_per_second": 26.457, "step": 71928 }, { "epoch": 27.01, "learning_rate": 3.608295795795796e-05, "loss": 0.0009, "step": 71955 }, { "epoch": 27.21, "learning_rate": 3.55271938605272e-05, "loss": 0.0005, "step": 72488 }, { "epoch": 27.41, "learning_rate": 3.497142976309643e-05, "loss": 0.0007, "step": 73021 }, { "epoch": 27.61, "learning_rate": 3.441566566566567e-05, "loss": 0.0001, "step": 73554 }, { "epoch": 27.81, "learning_rate": 3.3859901568234906e-05, "loss": 0.0011, "step": 74087 }, { "epoch": 28.0, "eval_loss": 0.01642591878771782, "eval_max_distance": 1, "eval_mean_distance": 0, "eval_runtime": 0.9124, "eval_samples_per_second": 392.371, "eval_steps_per_second": 26.304, "step": 74592 }, { "epoch": 28.01, "learning_rate": 3.3304137470804134e-05, "loss": 0.0006, "step": 74620 }, { "epoch": 28.21, "learning_rate": 3.2748373373373376e-05, "loss": 0.0008, "step": 75153 }, { "epoch": 28.41, "learning_rate": 3.219260927594261e-05, "loss": 0.0005, "step": 75686 }, { "epoch": 28.61, "learning_rate": 3.1636845178511846e-05, "loss": 0.0002, "step": 76219 }, { "epoch": 28.81, "learning_rate": 3.108108108108108e-05, "loss": 0.0002, "step": 76752 }, { "epoch": 29.0, "eval_loss": 0.018625039607286453, "eval_max_distance": 1, "eval_mean_distance": 0, "eval_runtime": 0.9144, "eval_samples_per_second": 391.524, "eval_steps_per_second": 26.247, "step": 77256 }, { "epoch": 29.01, "learning_rate": 3.052531698365032e-05, "loss": 0.0003, "step": 77285 }, { "epoch": 29.21, "learning_rate": 2.9969552886219555e-05, "loss": 0.0002, "step": 77818 }, { "epoch": 29.41, "learning_rate": 2.9413788788788787e-05, "loss": 0.0001, "step": 78351 }, { "epoch": 29.61, "learning_rate": 2.8858024691358025e-05, "loss": 0.0013, "step": 78884 }, { "epoch": 29.81, "learning_rate": 2.830226059392726e-05, "loss": 0.0001, "step": 79417 }, { "epoch": 30.0, "eval_loss": 0.029812639579176903, "eval_max_distance": 1, "eval_mean_distance": 0, "eval_runtime": 0.9075, "eval_samples_per_second": 394.481, "eval_steps_per_second": 26.446, "step": 79920 }, { "epoch": 30.01, "learning_rate": 2.77464964964965e-05, "loss": 0.0012, "step": 79950 }, { "epoch": 30.21, "learning_rate": 2.7190732399065734e-05, "loss": 0.0003, "step": 80483 }, { "epoch": 30.41, "learning_rate": 2.6634968301634972e-05, "loss": 0.0001, "step": 81016 }, { "epoch": 30.61, "learning_rate": 2.6079204204204204e-05, "loss": 0.0004, "step": 81549 }, { "epoch": 30.81, "learning_rate": 2.5523440106773443e-05, "loss": 0.0008, "step": 82082 }, { "epoch": 31.0, "eval_loss": 0.027695728465914726, "eval_max_distance": 1, "eval_mean_distance": 0, "eval_runtime": 0.9096, "eval_samples_per_second": 393.574, "eval_steps_per_second": 26.385, "step": 82584 }, { "epoch": 31.01, "learning_rate": 2.4967676009342678e-05, "loss": 0.0008, "step": 82615 }, { "epoch": 31.21, "learning_rate": 2.4411911911911913e-05, "loss": 0.0003, "step": 83148 }, { "epoch": 31.41, "learning_rate": 2.385614781448115e-05, "loss": 0.0003, "step": 83681 }, { "epoch": 31.61, "learning_rate": 2.3300383717050383e-05, "loss": 0.0002, "step": 84214 }, { "epoch": 31.81, "learning_rate": 2.2744619619619618e-05, "loss": 0.0003, "step": 84747 }, { "epoch": 32.0, "eval_loss": 0.03773302584886551, "eval_max_distance": 1, "eval_mean_distance": 0, "eval_runtime": 0.9096, "eval_samples_per_second": 393.585, "eval_steps_per_second": 26.386, "step": 85248 }, { "epoch": 32.01, "learning_rate": 2.2188855522188857e-05, "loss": 0.0007, "step": 85280 }, { "epoch": 32.21, "learning_rate": 2.1633091424758092e-05, "loss": 0.001, "step": 85813 }, { "epoch": 32.41, "learning_rate": 2.107732732732733e-05, "loss": 0.0002, "step": 86346 }, { "epoch": 32.61, "learning_rate": 2.0521563229896565e-05, "loss": 0.0002, "step": 86879 }, { "epoch": 32.81, "learning_rate": 1.99657991324658e-05, "loss": 0.0003, "step": 87412 }, { "epoch": 33.0, "eval_loss": 0.03536462038755417, "eval_max_distance": 1, "eval_mean_distance": 0, "eval_runtime": 0.9089, "eval_samples_per_second": 393.87, "eval_steps_per_second": 26.405, "step": 87912 }, { "epoch": 33.01, "learning_rate": 1.9410035035035036e-05, "loss": 0.0006, "step": 87945 }, { "epoch": 33.21, "learning_rate": 1.885427093760427e-05, "loss": 0.0003, "step": 88478 }, { "epoch": 33.41, "learning_rate": 1.8298506840173506e-05, "loss": 0.0003, "step": 89011 }, { "epoch": 33.61, "learning_rate": 1.7742742742742744e-05, "loss": 0.0006, "step": 89544 }, { "epoch": 33.81, "learning_rate": 1.718697864531198e-05, "loss": 0.0007, "step": 90077 }, { "epoch": 34.0, "eval_loss": 0.05854496732354164, "eval_max_distance": 1, "eval_mean_distance": 0, "eval_runtime": 0.9101, "eval_samples_per_second": 393.356, "eval_steps_per_second": 26.37, "step": 90576 }, { "epoch": 34.01, "learning_rate": 1.6631214547881215e-05, "loss": 0.0002, "step": 90610 }, { "epoch": 34.21, "learning_rate": 1.6075450450450453e-05, "loss": 0.0006, "step": 91143 }, { "epoch": 34.41, "learning_rate": 1.5519686353019688e-05, "loss": 0.0003, "step": 91676 }, { "epoch": 34.61, "learning_rate": 1.4963922255588922e-05, "loss": 0.0006, "step": 92209 }, { "epoch": 34.81, "learning_rate": 1.4408158158158158e-05, "loss": 0.0005, "step": 92742 }, { "epoch": 35.0, "eval_loss": 0.05680559575557709, "eval_max_distance": 1, "eval_mean_distance": 0, "eval_runtime": 0.9087, "eval_samples_per_second": 393.956, "eval_steps_per_second": 26.41, "step": 93240 }, { "epoch": 35.01, "learning_rate": 1.3852394060727395e-05, "loss": 0.0, "step": 93275 }, { "epoch": 35.21, "learning_rate": 1.329662996329663e-05, "loss": 0.0005, "step": 93808 }, { "epoch": 35.41, "learning_rate": 1.2740865865865867e-05, "loss": 0.0002, "step": 94341 }, { "epoch": 35.61, "learning_rate": 1.2185101768435102e-05, "loss": 0.0007, "step": 94874 }, { "epoch": 35.81, "learning_rate": 1.1629337671004337e-05, "loss": 0.0001, "step": 95407 }, { "epoch": 36.0, "eval_loss": 0.05670797452330589, "eval_max_distance": 1, "eval_mean_distance": 0, "eval_runtime": 0.9196, "eval_samples_per_second": 389.279, "eval_steps_per_second": 26.097, "step": 95904 }, { "epoch": 36.01, "learning_rate": 1.1073573573573574e-05, "loss": 0.0003, "step": 95940 }, { "epoch": 36.21, "learning_rate": 1.0517809476142811e-05, "loss": 0.0004, "step": 96473 }, { "epoch": 36.41, "learning_rate": 9.962045378712046e-06, "loss": 0.0002, "step": 97006 }, { "epoch": 36.61, "learning_rate": 9.406281281281281e-06, "loss": 0.0006, "step": 97539 }, { "epoch": 36.81, "learning_rate": 8.850517183850518e-06, "loss": 0.0009, "step": 98072 }, { "epoch": 37.0, "eval_loss": 0.060491062700748444, "eval_max_distance": 1, "eval_mean_distance": 0, "eval_runtime": 0.9347, "eval_samples_per_second": 383.022, "eval_steps_per_second": 25.677, "step": 98568 }, { "epoch": 37.01, "learning_rate": 8.294753086419753e-06, "loss": 0.0002, "step": 98605 }, { "epoch": 37.21, "learning_rate": 7.738988988988988e-06, "loss": 0.001, "step": 99138 }, { "epoch": 37.41, "learning_rate": 7.183224891558225e-06, "loss": 0.0003, "step": 99671 }, { "epoch": 37.61, "learning_rate": 6.627460794127462e-06, "loss": 0.0001, "step": 100204 }, { "epoch": 37.81, "learning_rate": 6.071696696696697e-06, "loss": 0.0002, "step": 100737 }, { "epoch": 38.0, "eval_loss": 0.06128498166799545, "eval_max_distance": 1, "eval_mean_distance": 0, "eval_runtime": 0.9116, "eval_samples_per_second": 392.708, "eval_steps_per_second": 26.327, "step": 101232 }, { "epoch": 38.01, "learning_rate": 5.515932599265933e-06, "loss": 0.0002, "step": 101270 }, { "epoch": 38.21, "learning_rate": 4.960168501835169e-06, "loss": 0.0002, "step": 101803 }, { "epoch": 38.41, "learning_rate": 4.404404404404405e-06, "loss": 0.0001, "step": 102336 }, { "epoch": 38.61, "learning_rate": 3.848640306973641e-06, "loss": 0.0006, "step": 102869 }, { "epoch": 38.81, "learning_rate": 3.2928762095428764e-06, "loss": 0.0002, "step": 103402 }, { "epoch": 39.0, "eval_loss": 0.05631242319941521, "eval_max_distance": 1, "eval_mean_distance": 0, "eval_runtime": 0.9146, "eval_samples_per_second": 391.433, "eval_steps_per_second": 26.241, "step": 103896 }, { "epoch": 39.01, "learning_rate": 2.7371121121121123e-06, "loss": 0.0004, "step": 103935 }, { "epoch": 39.21, "learning_rate": 2.1813480146813483e-06, "loss": 0.0004, "step": 104468 }, { "epoch": 39.41, "learning_rate": 1.625583917250584e-06, "loss": 0.0001, "step": 105001 }, { "epoch": 39.61, "learning_rate": 1.0698198198198198e-06, "loss": 0.0013, "step": 105534 }, { "epoch": 39.81, "learning_rate": 5.140557223890558e-07, "loss": 0.0002, "step": 106067 }, { "epoch": 40.0, "eval_loss": 0.06320372968912125, "eval_max_distance": 1, "eval_mean_distance": 0, "eval_runtime": 0.9155, "eval_samples_per_second": 391.055, "eval_steps_per_second": 26.216, "step": 106560 }, { "epoch": 40.0, "step": 106560, "total_flos": 6.005678715251712e+16, "train_loss": 0.2983123329788039, "train_runtime": 9788.1362, "train_samples_per_second": 163.251, "train_steps_per_second": 10.887 } ], "logging_steps": 533, "max_steps": 106560, "num_train_epochs": 40, "save_steps": 1066, "total_flos": 6.005678715251712e+16, "trial_name": null, "trial_params": null }