mentalrobertoo / trainer_state.json
crabone123's picture
Upload 9 files
e9bbd87 verified
raw
history blame
160 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"global_step": 468696,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 4.967996313175278e-05,
"loss": 2.3652,
"step": 1000
},
{
"epoch": 0.01,
"eval_loss": 2.5228402614593506,
"eval_runtime": 19.0224,
"eval_samples_per_second": 105.139,
"eval_steps_per_second": 1.682,
"step": 1000
},
{
"epoch": 0.01,
"learning_rate": 4.935992626350556e-05,
"loss": 2.3257,
"step": 2000
},
{
"epoch": 0.01,
"eval_loss": 2.4702858924865723,
"eval_runtime": 19.2339,
"eval_samples_per_second": 103.983,
"eval_steps_per_second": 1.664,
"step": 2000
},
{
"epoch": 0.02,
"learning_rate": 4.903988939525834e-05,
"loss": 2.3123,
"step": 3000
},
{
"epoch": 0.02,
"eval_loss": 2.5149893760681152,
"eval_runtime": 18.8526,
"eval_samples_per_second": 106.086,
"eval_steps_per_second": 1.697,
"step": 3000
},
{
"epoch": 0.03,
"learning_rate": 4.871985252701111e-05,
"loss": 2.3068,
"step": 4000
},
{
"epoch": 0.03,
"eval_loss": 2.4867441654205322,
"eval_runtime": 18.7454,
"eval_samples_per_second": 106.693,
"eval_steps_per_second": 1.707,
"step": 4000
},
{
"epoch": 0.03,
"learning_rate": 4.8399815658763894e-05,
"loss": 2.2906,
"step": 5000
},
{
"epoch": 0.03,
"eval_loss": 2.4581546783447266,
"eval_runtime": 19.2143,
"eval_samples_per_second": 104.089,
"eval_steps_per_second": 1.665,
"step": 5000
},
{
"epoch": 0.04,
"learning_rate": 4.807977879051667e-05,
"loss": 2.2817,
"step": 6000
},
{
"epoch": 0.04,
"eval_loss": 2.477738380432129,
"eval_runtime": 18.9443,
"eval_samples_per_second": 105.573,
"eval_steps_per_second": 1.689,
"step": 6000
},
{
"epoch": 0.04,
"learning_rate": 4.7759741922269444e-05,
"loss": 2.2713,
"step": 7000
},
{
"epoch": 0.04,
"eval_loss": 2.490509271621704,
"eval_runtime": 19.0513,
"eval_samples_per_second": 104.98,
"eval_steps_per_second": 1.68,
"step": 7000
},
{
"epoch": 0.05,
"learning_rate": 4.743970505402223e-05,
"loss": 2.2653,
"step": 8000
},
{
"epoch": 0.05,
"eval_loss": 2.472813129425049,
"eval_runtime": 19.2806,
"eval_samples_per_second": 103.731,
"eval_steps_per_second": 1.66,
"step": 8000
},
{
"epoch": 0.06,
"learning_rate": 4.7119668185775e-05,
"loss": 2.2581,
"step": 9000
},
{
"epoch": 0.06,
"eval_loss": 2.4772017002105713,
"eval_runtime": 19.5407,
"eval_samples_per_second": 102.351,
"eval_steps_per_second": 1.638,
"step": 9000
},
{
"epoch": 0.06,
"learning_rate": 4.679963131752778e-05,
"loss": 2.2687,
"step": 10000
},
{
"epoch": 0.06,
"eval_loss": 2.4584801197052,
"eval_runtime": 18.8505,
"eval_samples_per_second": 106.098,
"eval_steps_per_second": 1.698,
"step": 10000
},
{
"epoch": 0.07,
"learning_rate": 4.6479594449280565e-05,
"loss": 2.2473,
"step": 11000
},
{
"epoch": 0.07,
"eval_loss": 2.450211524963379,
"eval_runtime": 19.0047,
"eval_samples_per_second": 105.237,
"eval_steps_per_second": 1.684,
"step": 11000
},
{
"epoch": 0.08,
"learning_rate": 4.6159557581033336e-05,
"loss": 2.2536,
"step": 12000
},
{
"epoch": 0.08,
"eval_loss": 2.402937650680542,
"eval_runtime": 19.1856,
"eval_samples_per_second": 104.245,
"eval_steps_per_second": 1.668,
"step": 12000
},
{
"epoch": 0.08,
"learning_rate": 4.5839520712786115e-05,
"loss": 2.2355,
"step": 13000
},
{
"epoch": 0.08,
"eval_loss": 2.5034797191619873,
"eval_runtime": 18.9351,
"eval_samples_per_second": 105.624,
"eval_steps_per_second": 1.69,
"step": 13000
},
{
"epoch": 0.09,
"learning_rate": 4.551948384453889e-05,
"loss": 2.2356,
"step": 14000
},
{
"epoch": 0.09,
"eval_loss": 2.443594217300415,
"eval_runtime": 19.1979,
"eval_samples_per_second": 104.178,
"eval_steps_per_second": 1.667,
"step": 14000
},
{
"epoch": 0.1,
"learning_rate": 4.519944697629167e-05,
"loss": 2.2385,
"step": 15000
},
{
"epoch": 0.1,
"eval_loss": 2.4230918884277344,
"eval_runtime": 19.1941,
"eval_samples_per_second": 104.199,
"eval_steps_per_second": 1.667,
"step": 15000
},
{
"epoch": 0.1,
"learning_rate": 4.487941010804445e-05,
"loss": 2.229,
"step": 16000
},
{
"epoch": 0.1,
"eval_loss": 2.435939073562622,
"eval_runtime": 18.7132,
"eval_samples_per_second": 106.876,
"eval_steps_per_second": 1.71,
"step": 16000
},
{
"epoch": 0.11,
"learning_rate": 4.455937323979723e-05,
"loss": 2.2308,
"step": 17000
},
{
"epoch": 0.11,
"eval_loss": 2.379002094268799,
"eval_runtime": 18.8323,
"eval_samples_per_second": 106.2,
"eval_steps_per_second": 1.699,
"step": 17000
},
{
"epoch": 0.12,
"learning_rate": 4.4239336371550006e-05,
"loss": 2.2247,
"step": 18000
},
{
"epoch": 0.12,
"eval_loss": 2.440680742263794,
"eval_runtime": 18.8124,
"eval_samples_per_second": 106.313,
"eval_steps_per_second": 1.701,
"step": 18000
},
{
"epoch": 0.12,
"learning_rate": 4.391929950330278e-05,
"loss": 2.2262,
"step": 19000
},
{
"epoch": 0.12,
"eval_loss": 2.401104211807251,
"eval_runtime": 18.8589,
"eval_samples_per_second": 106.051,
"eval_steps_per_second": 1.697,
"step": 19000
},
{
"epoch": 0.13,
"learning_rate": 4.359926263505556e-05,
"loss": 2.2074,
"step": 20000
},
{
"epoch": 0.13,
"eval_loss": 2.382688522338867,
"eval_runtime": 18.7139,
"eval_samples_per_second": 106.872,
"eval_steps_per_second": 1.71,
"step": 20000
},
{
"epoch": 0.13,
"learning_rate": 4.327922576680834e-05,
"loss": 2.2204,
"step": 21000
},
{
"epoch": 0.13,
"eval_loss": 2.421189308166504,
"eval_runtime": 18.9386,
"eval_samples_per_second": 105.604,
"eval_steps_per_second": 1.69,
"step": 21000
},
{
"epoch": 0.14,
"learning_rate": 4.295918889856111e-05,
"loss": 2.2123,
"step": 22000
},
{
"epoch": 0.14,
"eval_loss": 2.4362740516662598,
"eval_runtime": 18.9745,
"eval_samples_per_second": 105.405,
"eval_steps_per_second": 1.686,
"step": 22000
},
{
"epoch": 0.15,
"learning_rate": 4.263915203031389e-05,
"loss": 2.2225,
"step": 23000
},
{
"epoch": 0.15,
"eval_loss": 2.426682710647583,
"eval_runtime": 19.1794,
"eval_samples_per_second": 104.278,
"eval_steps_per_second": 1.668,
"step": 23000
},
{
"epoch": 0.15,
"learning_rate": 4.231911516206668e-05,
"loss": 2.2137,
"step": 24000
},
{
"epoch": 0.15,
"eval_loss": 2.4169669151306152,
"eval_runtime": 18.8197,
"eval_samples_per_second": 106.272,
"eval_steps_per_second": 1.7,
"step": 24000
},
{
"epoch": 0.16,
"learning_rate": 4.199907829381945e-05,
"loss": 2.2143,
"step": 25000
},
{
"epoch": 0.16,
"eval_loss": 2.4082441329956055,
"eval_runtime": 18.9737,
"eval_samples_per_second": 105.409,
"eval_steps_per_second": 1.687,
"step": 25000
},
{
"epoch": 0.17,
"learning_rate": 4.167904142557223e-05,
"loss": 2.2131,
"step": 26000
},
{
"epoch": 0.17,
"eval_loss": 2.4836766719818115,
"eval_runtime": 19.0574,
"eval_samples_per_second": 104.946,
"eval_steps_per_second": 1.679,
"step": 26000
},
{
"epoch": 0.17,
"learning_rate": 4.1359004557325005e-05,
"loss": 2.1954,
"step": 27000
},
{
"epoch": 0.17,
"eval_loss": 2.43381404876709,
"eval_runtime": 18.8859,
"eval_samples_per_second": 105.899,
"eval_steps_per_second": 1.694,
"step": 27000
},
{
"epoch": 0.18,
"learning_rate": 4.1038967689077783e-05,
"loss": 2.1934,
"step": 28000
},
{
"epoch": 0.18,
"eval_loss": 2.4075064659118652,
"eval_runtime": 18.689,
"eval_samples_per_second": 107.015,
"eval_steps_per_second": 1.712,
"step": 28000
},
{
"epoch": 0.19,
"learning_rate": 4.071893082083056e-05,
"loss": 2.1943,
"step": 29000
},
{
"epoch": 0.19,
"eval_loss": 2.383098602294922,
"eval_runtime": 18.8218,
"eval_samples_per_second": 106.26,
"eval_steps_per_second": 1.7,
"step": 29000
},
{
"epoch": 0.19,
"learning_rate": 4.039889395258334e-05,
"loss": 2.1944,
"step": 30000
},
{
"epoch": 0.19,
"eval_loss": 2.3953185081481934,
"eval_runtime": 18.9451,
"eval_samples_per_second": 105.568,
"eval_steps_per_second": 1.689,
"step": 30000
},
{
"epoch": 0.2,
"learning_rate": 4.007885708433612e-05,
"loss": 2.1914,
"step": 31000
},
{
"epoch": 0.2,
"eval_loss": 2.411050796508789,
"eval_runtime": 18.7128,
"eval_samples_per_second": 106.878,
"eval_steps_per_second": 1.71,
"step": 31000
},
{
"epoch": 0.2,
"learning_rate": 3.975882021608889e-05,
"loss": 2.1865,
"step": 32000
},
{
"epoch": 0.2,
"eval_loss": 2.390427827835083,
"eval_runtime": 18.9045,
"eval_samples_per_second": 105.795,
"eval_steps_per_second": 1.693,
"step": 32000
},
{
"epoch": 0.21,
"learning_rate": 3.9438783347841675e-05,
"loss": 2.1871,
"step": 33000
},
{
"epoch": 0.21,
"eval_loss": 2.401388168334961,
"eval_runtime": 18.7096,
"eval_samples_per_second": 106.897,
"eval_steps_per_second": 1.71,
"step": 33000
},
{
"epoch": 0.22,
"learning_rate": 3.9118746479594454e-05,
"loss": 2.1792,
"step": 34000
},
{
"epoch": 0.22,
"eval_loss": 2.4562745094299316,
"eval_runtime": 18.8567,
"eval_samples_per_second": 106.063,
"eval_steps_per_second": 1.697,
"step": 34000
},
{
"epoch": 0.22,
"learning_rate": 3.8798709611347225e-05,
"loss": 2.1921,
"step": 35000
},
{
"epoch": 0.22,
"eval_loss": 2.399921417236328,
"eval_runtime": 18.7883,
"eval_samples_per_second": 106.449,
"eval_steps_per_second": 1.703,
"step": 35000
},
{
"epoch": 0.23,
"learning_rate": 3.847867274310001e-05,
"loss": 2.1831,
"step": 36000
},
{
"epoch": 0.23,
"eval_loss": 2.3935768604278564,
"eval_runtime": 18.8237,
"eval_samples_per_second": 106.249,
"eval_steps_per_second": 1.7,
"step": 36000
},
{
"epoch": 0.24,
"learning_rate": 3.815863587485278e-05,
"loss": 2.169,
"step": 37000
},
{
"epoch": 0.24,
"eval_loss": 2.385082960128784,
"eval_runtime": 18.9677,
"eval_samples_per_second": 105.442,
"eval_steps_per_second": 1.687,
"step": 37000
},
{
"epoch": 0.24,
"learning_rate": 3.783859900660556e-05,
"loss": 2.1619,
"step": 38000
},
{
"epoch": 0.24,
"eval_loss": 2.3289620876312256,
"eval_runtime": 19.0182,
"eval_samples_per_second": 105.162,
"eval_steps_per_second": 1.683,
"step": 38000
},
{
"epoch": 0.25,
"learning_rate": 3.7518562138358346e-05,
"loss": 2.1651,
"step": 39000
},
{
"epoch": 0.25,
"eval_loss": 2.3818867206573486,
"eval_runtime": 18.9593,
"eval_samples_per_second": 105.489,
"eval_steps_per_second": 1.688,
"step": 39000
},
{
"epoch": 0.26,
"learning_rate": 3.719852527011112e-05,
"loss": 2.1704,
"step": 40000
},
{
"epoch": 0.26,
"eval_loss": 2.3583953380584717,
"eval_runtime": 18.8577,
"eval_samples_per_second": 106.057,
"eval_steps_per_second": 1.697,
"step": 40000
},
{
"epoch": 0.26,
"learning_rate": 3.6878488401863896e-05,
"loss": 2.1601,
"step": 41000
},
{
"epoch": 0.26,
"eval_loss": 2.3705227375030518,
"eval_runtime": 19.0038,
"eval_samples_per_second": 105.242,
"eval_steps_per_second": 1.684,
"step": 41000
},
{
"epoch": 0.27,
"learning_rate": 3.6558451533616674e-05,
"loss": 2.1819,
"step": 42000
},
{
"epoch": 0.27,
"eval_loss": 2.3806064128875732,
"eval_runtime": 19.09,
"eval_samples_per_second": 104.767,
"eval_steps_per_second": 1.676,
"step": 42000
},
{
"epoch": 0.28,
"learning_rate": 3.623841466536945e-05,
"loss": 2.1666,
"step": 43000
},
{
"epoch": 0.28,
"eval_loss": 2.3670589923858643,
"eval_runtime": 18.9485,
"eval_samples_per_second": 105.549,
"eval_steps_per_second": 1.689,
"step": 43000
},
{
"epoch": 0.28,
"learning_rate": 3.591837779712223e-05,
"loss": 2.1718,
"step": 44000
},
{
"epoch": 0.28,
"eval_loss": 2.364011764526367,
"eval_runtime": 18.7665,
"eval_samples_per_second": 106.573,
"eval_steps_per_second": 1.705,
"step": 44000
},
{
"epoch": 0.29,
"learning_rate": 3.559834092887501e-05,
"loss": 2.1521,
"step": 45000
},
{
"epoch": 0.29,
"eval_loss": 2.373670816421509,
"eval_runtime": 18.9014,
"eval_samples_per_second": 105.812,
"eval_steps_per_second": 1.693,
"step": 45000
},
{
"epoch": 0.29,
"learning_rate": 3.527830406062779e-05,
"loss": 2.148,
"step": 46000
},
{
"epoch": 0.29,
"eval_loss": 2.377063035964966,
"eval_runtime": 19.012,
"eval_samples_per_second": 105.197,
"eval_steps_per_second": 1.683,
"step": 46000
},
{
"epoch": 0.3,
"learning_rate": 3.495826719238056e-05,
"loss": 2.1438,
"step": 47000
},
{
"epoch": 0.3,
"eval_loss": 2.3637826442718506,
"eval_runtime": 18.982,
"eval_samples_per_second": 105.363,
"eval_steps_per_second": 1.686,
"step": 47000
},
{
"epoch": 0.31,
"learning_rate": 3.4638230324133344e-05,
"loss": 2.1536,
"step": 48000
},
{
"epoch": 0.31,
"eval_loss": 2.3571810722351074,
"eval_runtime": 18.8471,
"eval_samples_per_second": 106.117,
"eval_steps_per_second": 1.698,
"step": 48000
},
{
"epoch": 0.31,
"learning_rate": 3.431819345588612e-05,
"loss": 2.1505,
"step": 49000
},
{
"epoch": 0.31,
"eval_loss": 2.3516790866851807,
"eval_runtime": 18.8575,
"eval_samples_per_second": 106.059,
"eval_steps_per_second": 1.697,
"step": 49000
},
{
"epoch": 0.32,
"learning_rate": 3.3998156587638894e-05,
"loss": 2.1319,
"step": 50000
},
{
"epoch": 0.32,
"eval_loss": 2.3615307807922363,
"eval_runtime": 19.1166,
"eval_samples_per_second": 104.621,
"eval_steps_per_second": 1.674,
"step": 50000
},
{
"epoch": 0.33,
"learning_rate": 3.367811971939168e-05,
"loss": 2.123,
"step": 51000
},
{
"epoch": 0.33,
"eval_loss": 2.3522212505340576,
"eval_runtime": 19.1501,
"eval_samples_per_second": 104.438,
"eval_steps_per_second": 1.671,
"step": 51000
},
{
"epoch": 0.33,
"learning_rate": 3.335808285114445e-05,
"loss": 2.1513,
"step": 52000
},
{
"epoch": 0.33,
"eval_loss": 2.388401746749878,
"eval_runtime": 18.8344,
"eval_samples_per_second": 106.189,
"eval_steps_per_second": 1.699,
"step": 52000
},
{
"epoch": 0.34,
"learning_rate": 3.303804598289723e-05,
"loss": 2.1419,
"step": 53000
},
{
"epoch": 0.34,
"eval_loss": 2.32639479637146,
"eval_runtime": 18.8162,
"eval_samples_per_second": 106.292,
"eval_steps_per_second": 1.701,
"step": 53000
},
{
"epoch": 0.35,
"learning_rate": 3.271800911465001e-05,
"loss": 2.1404,
"step": 54000
},
{
"epoch": 0.35,
"eval_loss": 2.3595259189605713,
"eval_runtime": 18.8272,
"eval_samples_per_second": 106.229,
"eval_steps_per_second": 1.7,
"step": 54000
},
{
"epoch": 0.35,
"learning_rate": 3.2397972246402786e-05,
"loss": 2.128,
"step": 55000
},
{
"epoch": 0.35,
"eval_loss": 2.3471484184265137,
"eval_runtime": 18.9594,
"eval_samples_per_second": 105.489,
"eval_steps_per_second": 1.688,
"step": 55000
},
{
"epoch": 0.36,
"learning_rate": 3.2077935378155565e-05,
"loss": 2.1287,
"step": 56000
},
{
"epoch": 0.36,
"eval_loss": 2.347370147705078,
"eval_runtime": 18.9278,
"eval_samples_per_second": 105.665,
"eval_steps_per_second": 1.691,
"step": 56000
},
{
"epoch": 0.36,
"learning_rate": 3.175789850990834e-05,
"loss": 2.1372,
"step": 57000
},
{
"epoch": 0.36,
"eval_loss": 2.3139336109161377,
"eval_runtime": 19.0473,
"eval_samples_per_second": 105.002,
"eval_steps_per_second": 1.68,
"step": 57000
},
{
"epoch": 0.37,
"learning_rate": 3.143786164166112e-05,
"loss": 2.1301,
"step": 58000
},
{
"epoch": 0.37,
"eval_loss": 2.3145127296447754,
"eval_runtime": 18.831,
"eval_samples_per_second": 106.208,
"eval_steps_per_second": 1.699,
"step": 58000
},
{
"epoch": 0.38,
"learning_rate": 3.11178247734139e-05,
"loss": 2.128,
"step": 59000
},
{
"epoch": 0.38,
"eval_loss": 2.3634743690490723,
"eval_runtime": 19.0052,
"eval_samples_per_second": 105.234,
"eval_steps_per_second": 1.684,
"step": 59000
},
{
"epoch": 0.38,
"learning_rate": 3.079778790516668e-05,
"loss": 2.1088,
"step": 60000
},
{
"epoch": 0.38,
"eval_loss": 2.3068251609802246,
"eval_runtime": 18.9935,
"eval_samples_per_second": 105.299,
"eval_steps_per_second": 1.685,
"step": 60000
},
{
"epoch": 0.39,
"learning_rate": 3.0477751036919456e-05,
"loss": 2.122,
"step": 61000
},
{
"epoch": 0.39,
"eval_loss": 2.312502145767212,
"eval_runtime": 18.8963,
"eval_samples_per_second": 105.841,
"eval_steps_per_second": 1.693,
"step": 61000
},
{
"epoch": 0.4,
"learning_rate": 3.015771416867223e-05,
"loss": 2.1113,
"step": 62000
},
{
"epoch": 0.4,
"eval_loss": 2.3446314334869385,
"eval_runtime": 18.8671,
"eval_samples_per_second": 106.005,
"eval_steps_per_second": 1.696,
"step": 62000
},
{
"epoch": 0.4,
"learning_rate": 2.983767730042501e-05,
"loss": 2.1108,
"step": 63000
},
{
"epoch": 0.4,
"eval_loss": 2.3173420429229736,
"eval_runtime": 18.7418,
"eval_samples_per_second": 106.713,
"eval_steps_per_second": 1.707,
"step": 63000
},
{
"epoch": 0.41,
"learning_rate": 2.951764043217779e-05,
"loss": 2.125,
"step": 64000
},
{
"epoch": 0.41,
"eval_loss": 2.363111734390259,
"eval_runtime": 18.789,
"eval_samples_per_second": 106.445,
"eval_steps_per_second": 1.703,
"step": 64000
},
{
"epoch": 0.42,
"learning_rate": 2.9197603563930563e-05,
"loss": 2.1106,
"step": 65000
},
{
"epoch": 0.42,
"eval_loss": 2.331869602203369,
"eval_runtime": 18.9057,
"eval_samples_per_second": 105.788,
"eval_steps_per_second": 1.693,
"step": 65000
},
{
"epoch": 0.42,
"learning_rate": 2.8877566695683345e-05,
"loss": 2.1143,
"step": 66000
},
{
"epoch": 0.42,
"eval_loss": 2.300299882888794,
"eval_runtime": 18.7948,
"eval_samples_per_second": 106.413,
"eval_steps_per_second": 1.703,
"step": 66000
},
{
"epoch": 0.43,
"learning_rate": 2.8557529827436123e-05,
"loss": 2.0982,
"step": 67000
},
{
"epoch": 0.43,
"eval_loss": 2.3044443130493164,
"eval_runtime": 19.1803,
"eval_samples_per_second": 104.273,
"eval_steps_per_second": 1.668,
"step": 67000
},
{
"epoch": 0.44,
"learning_rate": 2.82374929591889e-05,
"loss": 2.1026,
"step": 68000
},
{
"epoch": 0.44,
"eval_loss": 2.305398464202881,
"eval_runtime": 18.9121,
"eval_samples_per_second": 105.752,
"eval_steps_per_second": 1.692,
"step": 68000
},
{
"epoch": 0.44,
"learning_rate": 2.791745609094168e-05,
"loss": 2.0995,
"step": 69000
},
{
"epoch": 0.44,
"eval_loss": 2.3068206310272217,
"eval_runtime": 18.8989,
"eval_samples_per_second": 105.826,
"eval_steps_per_second": 1.693,
"step": 69000
},
{
"epoch": 0.45,
"learning_rate": 2.7597419222694455e-05,
"loss": 2.0844,
"step": 70000
},
{
"epoch": 0.45,
"eval_loss": 2.3477184772491455,
"eval_runtime": 19.0274,
"eval_samples_per_second": 105.111,
"eval_steps_per_second": 1.682,
"step": 70000
},
{
"epoch": 0.45,
"learning_rate": 2.7277382354447233e-05,
"loss": 2.1008,
"step": 71000
},
{
"epoch": 0.45,
"eval_loss": 2.339860439300537,
"eval_runtime": 18.7939,
"eval_samples_per_second": 106.418,
"eval_steps_per_second": 1.703,
"step": 71000
},
{
"epoch": 0.46,
"learning_rate": 2.6957345486200015e-05,
"loss": 2.092,
"step": 72000
},
{
"epoch": 0.46,
"eval_loss": 2.3236074447631836,
"eval_runtime": 18.7746,
"eval_samples_per_second": 106.527,
"eval_steps_per_second": 1.704,
"step": 72000
},
{
"epoch": 0.47,
"learning_rate": 2.663730861795279e-05,
"loss": 2.09,
"step": 73000
},
{
"epoch": 0.47,
"eval_loss": 2.3070333003997803,
"eval_runtime": 19.3882,
"eval_samples_per_second": 103.155,
"eval_steps_per_second": 1.65,
"step": 73000
},
{
"epoch": 0.47,
"learning_rate": 2.631727174970557e-05,
"loss": 2.0984,
"step": 74000
},
{
"epoch": 0.47,
"eval_loss": 2.31845760345459,
"eval_runtime": 19.5362,
"eval_samples_per_second": 102.374,
"eval_steps_per_second": 1.638,
"step": 74000
},
{
"epoch": 0.48,
"learning_rate": 2.5997234881458344e-05,
"loss": 2.0965,
"step": 75000
},
{
"epoch": 0.48,
"eval_loss": 2.306812047958374,
"eval_runtime": 19.4702,
"eval_samples_per_second": 102.721,
"eval_steps_per_second": 1.644,
"step": 75000
},
{
"epoch": 0.49,
"learning_rate": 2.5677198013211122e-05,
"loss": 2.081,
"step": 76000
},
{
"epoch": 0.49,
"eval_loss": 2.274367570877075,
"eval_runtime": 19.5806,
"eval_samples_per_second": 102.142,
"eval_steps_per_second": 1.634,
"step": 76000
},
{
"epoch": 0.49,
"learning_rate": 2.5357161144963904e-05,
"loss": 2.0871,
"step": 77000
},
{
"epoch": 0.49,
"eval_loss": 2.254237651824951,
"eval_runtime": 19.7552,
"eval_samples_per_second": 101.239,
"eval_steps_per_second": 1.62,
"step": 77000
},
{
"epoch": 0.5,
"learning_rate": 2.503712427671668e-05,
"loss": 2.0751,
"step": 78000
},
{
"epoch": 0.5,
"eval_loss": 2.2817015647888184,
"eval_runtime": 19.5765,
"eval_samples_per_second": 102.163,
"eval_steps_per_second": 1.635,
"step": 78000
},
{
"epoch": 0.51,
"learning_rate": 2.4717087408469457e-05,
"loss": 2.0875,
"step": 79000
},
{
"epoch": 0.51,
"eval_loss": 2.288637161254883,
"eval_runtime": 19.6173,
"eval_samples_per_second": 101.951,
"eval_steps_per_second": 1.631,
"step": 79000
},
{
"epoch": 0.51,
"learning_rate": 2.4397050540222236e-05,
"loss": 2.0847,
"step": 80000
},
{
"epoch": 0.51,
"eval_loss": 2.3093936443328857,
"eval_runtime": 19.3962,
"eval_samples_per_second": 103.113,
"eval_steps_per_second": 1.65,
"step": 80000
},
{
"epoch": 0.52,
"learning_rate": 2.4077013671975014e-05,
"loss": 2.0861,
"step": 81000
},
{
"epoch": 0.52,
"eval_loss": 2.294950246810913,
"eval_runtime": 19.5483,
"eval_samples_per_second": 102.311,
"eval_steps_per_second": 1.637,
"step": 81000
},
{
"epoch": 0.52,
"learning_rate": 2.375697680372779e-05,
"loss": 2.0689,
"step": 82000
},
{
"epoch": 0.52,
"eval_loss": 2.293389320373535,
"eval_runtime": 19.51,
"eval_samples_per_second": 102.512,
"eval_steps_per_second": 1.64,
"step": 82000
},
{
"epoch": 0.53,
"learning_rate": 2.3436939935480567e-05,
"loss": 2.0767,
"step": 83000
},
{
"epoch": 0.53,
"eval_loss": 2.304983615875244,
"eval_runtime": 19.26,
"eval_samples_per_second": 103.842,
"eval_steps_per_second": 1.661,
"step": 83000
},
{
"epoch": 0.54,
"learning_rate": 2.311690306723335e-05,
"loss": 2.0711,
"step": 84000
},
{
"epoch": 0.54,
"eval_loss": 2.2823355197906494,
"eval_runtime": 20.4429,
"eval_samples_per_second": 97.834,
"eval_steps_per_second": 1.565,
"step": 84000
},
{
"epoch": 0.54,
"learning_rate": 2.2796866198986124e-05,
"loss": 2.0654,
"step": 85000
},
{
"epoch": 0.54,
"eval_loss": 2.280226469039917,
"eval_runtime": 19.5501,
"eval_samples_per_second": 102.301,
"eval_steps_per_second": 1.637,
"step": 85000
},
{
"epoch": 0.55,
"learning_rate": 2.2476829330738902e-05,
"loss": 2.0627,
"step": 86000
},
{
"epoch": 0.55,
"eval_loss": 2.2770543098449707,
"eval_runtime": 19.4549,
"eval_samples_per_second": 102.802,
"eval_steps_per_second": 1.645,
"step": 86000
},
{
"epoch": 0.56,
"learning_rate": 2.215679246249168e-05,
"loss": 2.0656,
"step": 87000
},
{
"epoch": 0.56,
"eval_loss": 2.2922134399414062,
"eval_runtime": 19.3407,
"eval_samples_per_second": 103.409,
"eval_steps_per_second": 1.655,
"step": 87000
},
{
"epoch": 0.56,
"learning_rate": 2.1836755594244456e-05,
"loss": 2.07,
"step": 88000
},
{
"epoch": 0.56,
"eval_loss": 2.268709897994995,
"eval_runtime": 19.4551,
"eval_samples_per_second": 102.801,
"eval_steps_per_second": 1.645,
"step": 88000
},
{
"epoch": 0.57,
"learning_rate": 2.1516718725997238e-05,
"loss": 2.0661,
"step": 89000
},
{
"epoch": 0.57,
"eval_loss": 2.247802972793579,
"eval_runtime": 19.273,
"eval_samples_per_second": 103.772,
"eval_steps_per_second": 1.66,
"step": 89000
},
{
"epoch": 0.58,
"learning_rate": 2.1196681857750016e-05,
"loss": 2.0511,
"step": 90000
},
{
"epoch": 0.58,
"eval_loss": 2.3074941635131836,
"eval_runtime": 19.2075,
"eval_samples_per_second": 104.126,
"eval_steps_per_second": 1.666,
"step": 90000
},
{
"epoch": 0.58,
"learning_rate": 2.087664498950279e-05,
"loss": 2.0582,
"step": 91000
},
{
"epoch": 0.58,
"eval_loss": 2.248690605163574,
"eval_runtime": 19.2432,
"eval_samples_per_second": 103.933,
"eval_steps_per_second": 1.663,
"step": 91000
},
{
"epoch": 0.59,
"learning_rate": 2.055660812125557e-05,
"loss": 2.0626,
"step": 92000
},
{
"epoch": 0.59,
"eval_loss": 2.2588484287261963,
"eval_runtime": 19.4441,
"eval_samples_per_second": 102.859,
"eval_steps_per_second": 1.646,
"step": 92000
},
{
"epoch": 0.6,
"learning_rate": 2.0236571253008348e-05,
"loss": 2.0562,
"step": 93000
},
{
"epoch": 0.6,
"eval_loss": 2.274319887161255,
"eval_runtime": 19.4979,
"eval_samples_per_second": 102.575,
"eval_steps_per_second": 1.641,
"step": 93000
},
{
"epoch": 0.6,
"learning_rate": 1.9916534384761126e-05,
"loss": 2.0511,
"step": 94000
},
{
"epoch": 0.6,
"eval_loss": 2.276171922683716,
"eval_runtime": 19.331,
"eval_samples_per_second": 103.461,
"eval_steps_per_second": 1.655,
"step": 94000
},
{
"epoch": 0.61,
"learning_rate": 1.9596497516513904e-05,
"loss": 2.0413,
"step": 95000
},
{
"epoch": 0.61,
"eval_loss": 2.2398881912231445,
"eval_runtime": 19.5099,
"eval_samples_per_second": 102.512,
"eval_steps_per_second": 1.64,
"step": 95000
},
{
"epoch": 0.61,
"learning_rate": 1.927646064826668e-05,
"loss": 2.0496,
"step": 96000
},
{
"epoch": 0.61,
"eval_loss": 2.271150588989258,
"eval_runtime": 19.317,
"eval_samples_per_second": 103.536,
"eval_steps_per_second": 1.657,
"step": 96000
},
{
"epoch": 0.62,
"learning_rate": 1.8956423780019458e-05,
"loss": 2.0564,
"step": 97000
},
{
"epoch": 0.62,
"eval_loss": 2.2770469188690186,
"eval_runtime": 19.1141,
"eval_samples_per_second": 104.635,
"eval_steps_per_second": 1.674,
"step": 97000
},
{
"epoch": 0.63,
"learning_rate": 1.863638691177224e-05,
"loss": 2.0505,
"step": 98000
},
{
"epoch": 0.63,
"eval_loss": 2.2885847091674805,
"eval_runtime": 19.6608,
"eval_samples_per_second": 101.725,
"eval_steps_per_second": 1.628,
"step": 98000
},
{
"epoch": 0.63,
"learning_rate": 1.8316350043525015e-05,
"loss": 2.0504,
"step": 99000
},
{
"epoch": 0.63,
"eval_loss": 2.3180038928985596,
"eval_runtime": 19.4021,
"eval_samples_per_second": 103.082,
"eval_steps_per_second": 1.649,
"step": 99000
},
{
"epoch": 0.64,
"learning_rate": 1.7996313175277793e-05,
"loss": 2.0439,
"step": 100000
},
{
"epoch": 0.64,
"eval_loss": 2.2651731967926025,
"eval_runtime": 19.3214,
"eval_samples_per_second": 103.512,
"eval_steps_per_second": 1.656,
"step": 100000
},
{
"epoch": 0.65,
"learning_rate": 1.767627630703057e-05,
"loss": 2.0461,
"step": 101000
},
{
"epoch": 0.65,
"eval_loss": 2.222968101501465,
"eval_runtime": 19.2774,
"eval_samples_per_second": 103.749,
"eval_steps_per_second": 1.66,
"step": 101000
},
{
"epoch": 0.65,
"learning_rate": 1.7356239438783346e-05,
"loss": 2.0405,
"step": 102000
},
{
"epoch": 0.65,
"eval_loss": 2.2448790073394775,
"eval_runtime": 21.2727,
"eval_samples_per_second": 94.017,
"eval_steps_per_second": 1.504,
"step": 102000
},
{
"epoch": 0.66,
"learning_rate": 1.7036202570536128e-05,
"loss": 2.038,
"step": 103000
},
{
"epoch": 0.66,
"eval_loss": 2.2096433639526367,
"eval_runtime": 19.292,
"eval_samples_per_second": 103.67,
"eval_steps_per_second": 1.659,
"step": 103000
},
{
"epoch": 0.67,
"learning_rate": 1.6716165702288906e-05,
"loss": 2.0205,
"step": 104000
},
{
"epoch": 0.67,
"eval_loss": 2.2131240367889404,
"eval_runtime": 19.1995,
"eval_samples_per_second": 104.169,
"eval_steps_per_second": 1.667,
"step": 104000
},
{
"epoch": 0.67,
"learning_rate": 1.639612883404168e-05,
"loss": 2.0196,
"step": 105000
},
{
"epoch": 0.67,
"eval_loss": 2.2505383491516113,
"eval_runtime": 19.4936,
"eval_samples_per_second": 102.598,
"eval_steps_per_second": 1.642,
"step": 105000
},
{
"epoch": 0.68,
"learning_rate": 1.607609196579446e-05,
"loss": 2.0272,
"step": 106000
},
{
"epoch": 0.68,
"eval_loss": 2.243058681488037,
"eval_runtime": 19.4712,
"eval_samples_per_second": 102.716,
"eval_steps_per_second": 1.643,
"step": 106000
},
{
"epoch": 0.68,
"learning_rate": 1.5756055097547238e-05,
"loss": 2.0276,
"step": 107000
},
{
"epoch": 0.68,
"eval_loss": 2.2137022018432617,
"eval_runtime": 18.6801,
"eval_samples_per_second": 107.066,
"eval_steps_per_second": 1.713,
"step": 107000
},
{
"epoch": 0.69,
"learning_rate": 1.5436018229300017e-05,
"loss": 2.0224,
"step": 108000
},
{
"epoch": 0.69,
"eval_loss": 2.2309203147888184,
"eval_runtime": 18.8357,
"eval_samples_per_second": 106.181,
"eval_steps_per_second": 1.699,
"step": 108000
},
{
"epoch": 0.7,
"learning_rate": 1.5115981361052795e-05,
"loss": 2.0253,
"step": 109000
},
{
"epoch": 0.7,
"eval_loss": 2.2213120460510254,
"eval_runtime": 19.2801,
"eval_samples_per_second": 103.734,
"eval_steps_per_second": 1.66,
"step": 109000
},
{
"epoch": 0.7,
"learning_rate": 1.4795944492805572e-05,
"loss": 2.0199,
"step": 110000
},
{
"epoch": 0.7,
"eval_loss": 2.2416763305664062,
"eval_runtime": 18.8526,
"eval_samples_per_second": 106.086,
"eval_steps_per_second": 1.697,
"step": 110000
},
{
"epoch": 0.71,
"learning_rate": 1.4475907624558348e-05,
"loss": 2.0216,
"step": 111000
},
{
"epoch": 0.71,
"eval_loss": 2.24078369140625,
"eval_runtime": 18.5093,
"eval_samples_per_second": 108.054,
"eval_steps_per_second": 1.729,
"step": 111000
},
{
"epoch": 0.72,
"learning_rate": 1.4155870756311127e-05,
"loss": 2.0236,
"step": 112000
},
{
"epoch": 0.72,
"eval_loss": 2.2598512172698975,
"eval_runtime": 19.0496,
"eval_samples_per_second": 104.989,
"eval_steps_per_second": 1.68,
"step": 112000
},
{
"epoch": 0.72,
"learning_rate": 1.3835833888063907e-05,
"loss": 2.0247,
"step": 113000
},
{
"epoch": 0.72,
"eval_loss": 2.2282919883728027,
"eval_runtime": 18.7751,
"eval_samples_per_second": 106.524,
"eval_steps_per_second": 1.704,
"step": 113000
},
{
"epoch": 0.73,
"learning_rate": 1.3515797019816683e-05,
"loss": 2.0263,
"step": 114000
},
{
"epoch": 0.73,
"eval_loss": 2.248234748840332,
"eval_runtime": 18.8567,
"eval_samples_per_second": 106.063,
"eval_steps_per_second": 1.697,
"step": 114000
},
{
"epoch": 0.74,
"learning_rate": 1.3195760151569462e-05,
"loss": 2.014,
"step": 115000
},
{
"epoch": 0.74,
"eval_loss": 2.194716691970825,
"eval_runtime": 18.7872,
"eval_samples_per_second": 106.455,
"eval_steps_per_second": 1.703,
"step": 115000
},
{
"epoch": 0.74,
"learning_rate": 1.2875723283322239e-05,
"loss": 2.0076,
"step": 116000
},
{
"epoch": 0.74,
"eval_loss": 2.233458995819092,
"eval_runtime": 18.8711,
"eval_samples_per_second": 105.982,
"eval_steps_per_second": 1.696,
"step": 116000
},
{
"epoch": 0.75,
"learning_rate": 1.2555686415075015e-05,
"loss": 2.011,
"step": 117000
},
{
"epoch": 0.75,
"eval_loss": 2.213284492492676,
"eval_runtime": 19.5167,
"eval_samples_per_second": 102.477,
"eval_steps_per_second": 1.64,
"step": 117000
},
{
"epoch": 0.76,
"learning_rate": 1.2235649546827795e-05,
"loss": 2.0216,
"step": 118000
},
{
"epoch": 0.76,
"eval_loss": 2.210317373275757,
"eval_runtime": 18.6333,
"eval_samples_per_second": 107.334,
"eval_steps_per_second": 1.717,
"step": 118000
},
{
"epoch": 0.76,
"learning_rate": 1.1915612678580574e-05,
"loss": 2.0097,
"step": 119000
},
{
"epoch": 0.76,
"eval_loss": 2.241175651550293,
"eval_runtime": 18.694,
"eval_samples_per_second": 106.986,
"eval_steps_per_second": 1.712,
"step": 119000
},
{
"epoch": 0.77,
"learning_rate": 1.159557581033335e-05,
"loss": 2.0076,
"step": 120000
},
{
"epoch": 0.77,
"eval_loss": 2.2543113231658936,
"eval_runtime": 18.8082,
"eval_samples_per_second": 106.336,
"eval_steps_per_second": 1.701,
"step": 120000
},
{
"epoch": 0.77,
"learning_rate": 1.1275538942086129e-05,
"loss": 2.01,
"step": 121000
},
{
"epoch": 0.77,
"eval_loss": 2.2642598152160645,
"eval_runtime": 19.4522,
"eval_samples_per_second": 102.816,
"eval_steps_per_second": 1.645,
"step": 121000
},
{
"epoch": 0.78,
"learning_rate": 1.0955502073838907e-05,
"loss": 2.0074,
"step": 122000
},
{
"epoch": 0.78,
"eval_loss": 2.2413113117218018,
"eval_runtime": 18.8078,
"eval_samples_per_second": 106.339,
"eval_steps_per_second": 1.701,
"step": 122000
},
{
"epoch": 0.79,
"learning_rate": 1.0635465205591686e-05,
"loss": 1.9898,
"step": 123000
},
{
"epoch": 0.79,
"eval_loss": 2.2442147731781006,
"eval_runtime": 18.8179,
"eval_samples_per_second": 106.282,
"eval_steps_per_second": 1.701,
"step": 123000
},
{
"epoch": 0.79,
"learning_rate": 1.0315428337344462e-05,
"loss": 2.0119,
"step": 124000
},
{
"epoch": 0.79,
"eval_loss": 2.227520704269409,
"eval_runtime": 18.9241,
"eval_samples_per_second": 105.686,
"eval_steps_per_second": 1.691,
"step": 124000
},
{
"epoch": 0.8,
"learning_rate": 9.99539146909724e-06,
"loss": 1.993,
"step": 125000
},
{
"epoch": 0.8,
"eval_loss": 2.2116286754608154,
"eval_runtime": 19.6066,
"eval_samples_per_second": 102.007,
"eval_steps_per_second": 1.632,
"step": 125000
},
{
"epoch": 0.81,
"learning_rate": 9.675354600850019e-06,
"loss": 2.0092,
"step": 126000
},
{
"epoch": 0.81,
"eval_loss": 2.2108232975006104,
"eval_runtime": 18.7069,
"eval_samples_per_second": 106.912,
"eval_steps_per_second": 1.711,
"step": 126000
},
{
"epoch": 0.81,
"learning_rate": 9.355317732602796e-06,
"loss": 2.0019,
"step": 127000
},
{
"epoch": 0.81,
"eval_loss": 2.2236363887786865,
"eval_runtime": 18.8801,
"eval_samples_per_second": 105.931,
"eval_steps_per_second": 1.695,
"step": 127000
},
{
"epoch": 0.82,
"learning_rate": 9.035280864355574e-06,
"loss": 1.9931,
"step": 128000
},
{
"epoch": 0.82,
"eval_loss": 2.2105228900909424,
"eval_runtime": 21.3819,
"eval_samples_per_second": 93.537,
"eval_steps_per_second": 1.497,
"step": 128000
},
{
"epoch": 0.83,
"learning_rate": 8.715243996108352e-06,
"loss": 1.9851,
"step": 129000
},
{
"epoch": 0.83,
"eval_loss": 2.2179064750671387,
"eval_runtime": 19.3741,
"eval_samples_per_second": 103.231,
"eval_steps_per_second": 1.652,
"step": 129000
},
{
"epoch": 0.83,
"learning_rate": 8.395207127861129e-06,
"loss": 1.9882,
"step": 130000
},
{
"epoch": 0.83,
"eval_loss": 2.2303926944732666,
"eval_runtime": 18.846,
"eval_samples_per_second": 106.123,
"eval_steps_per_second": 1.698,
"step": 130000
},
{
"epoch": 0.84,
"learning_rate": 8.075170259613907e-06,
"loss": 1.999,
"step": 131000
},
{
"epoch": 0.84,
"eval_loss": 2.202813148498535,
"eval_runtime": 19.3498,
"eval_samples_per_second": 103.36,
"eval_steps_per_second": 1.654,
"step": 131000
},
{
"epoch": 0.84,
"learning_rate": 7.755133391366686e-06,
"loss": 1.9848,
"step": 132000
},
{
"epoch": 0.84,
"eval_loss": 2.1549251079559326,
"eval_runtime": 20.1588,
"eval_samples_per_second": 99.212,
"eval_steps_per_second": 1.587,
"step": 132000
},
{
"epoch": 0.85,
"learning_rate": 7.435096523119464e-06,
"loss": 1.9962,
"step": 133000
},
{
"epoch": 0.85,
"eval_loss": 2.2457568645477295,
"eval_runtime": 19.1213,
"eval_samples_per_second": 104.595,
"eval_steps_per_second": 1.674,
"step": 133000
},
{
"epoch": 0.86,
"learning_rate": 7.115059654872242e-06,
"loss": 1.991,
"step": 134000
},
{
"epoch": 0.86,
"eval_loss": 2.1861023902893066,
"eval_runtime": 19.2023,
"eval_samples_per_second": 104.154,
"eval_steps_per_second": 1.666,
"step": 134000
},
{
"epoch": 0.86,
"learning_rate": 6.79502278662502e-06,
"loss": 1.9901,
"step": 135000
},
{
"epoch": 0.86,
"eval_loss": 2.2025179862976074,
"eval_runtime": 19.2167,
"eval_samples_per_second": 104.076,
"eval_steps_per_second": 1.665,
"step": 135000
},
{
"epoch": 0.87,
"learning_rate": 6.474985918377798e-06,
"loss": 1.9698,
"step": 136000
},
{
"epoch": 0.87,
"eval_loss": 2.2299790382385254,
"eval_runtime": 20.1153,
"eval_samples_per_second": 99.427,
"eval_steps_per_second": 1.591,
"step": 136000
},
{
"epoch": 0.88,
"learning_rate": 6.154949050130575e-06,
"loss": 1.9772,
"step": 137000
},
{
"epoch": 0.88,
"eval_loss": 2.1934893131256104,
"eval_runtime": 19.0706,
"eval_samples_per_second": 104.874,
"eval_steps_per_second": 1.678,
"step": 137000
},
{
"epoch": 0.88,
"learning_rate": 5.8349121818833536e-06,
"loss": 1.974,
"step": 138000
},
{
"epoch": 0.88,
"eval_loss": 2.201178789138794,
"eval_runtime": 18.9851,
"eval_samples_per_second": 105.346,
"eval_steps_per_second": 1.686,
"step": 138000
},
{
"epoch": 0.89,
"learning_rate": 5.514875313636131e-06,
"loss": 1.9906,
"step": 139000
},
{
"epoch": 0.89,
"eval_loss": 2.2042794227600098,
"eval_runtime": 19.1406,
"eval_samples_per_second": 104.49,
"eval_steps_per_second": 1.672,
"step": 139000
},
{
"epoch": 0.9,
"learning_rate": 5.194838445388909e-06,
"loss": 1.9899,
"step": 140000
},
{
"epoch": 0.9,
"eval_loss": 2.187676191329956,
"eval_runtime": 19.4746,
"eval_samples_per_second": 102.698,
"eval_steps_per_second": 1.643,
"step": 140000
},
{
"epoch": 0.9,
"learning_rate": 4.874801577141687e-06,
"loss": 1.9785,
"step": 141000
},
{
"epoch": 0.9,
"eval_loss": 2.2104039192199707,
"eval_runtime": 19.2016,
"eval_samples_per_second": 104.158,
"eval_steps_per_second": 1.667,
"step": 141000
},
{
"epoch": 0.91,
"learning_rate": 4.5547647088944646e-06,
"loss": 1.9682,
"step": 142000
},
{
"epoch": 0.91,
"eval_loss": 2.1898605823516846,
"eval_runtime": 19.2296,
"eval_samples_per_second": 104.006,
"eval_steps_per_second": 1.664,
"step": 142000
},
{
"epoch": 0.92,
"learning_rate": 4.234727840647243e-06,
"loss": 1.9785,
"step": 143000
},
{
"epoch": 0.92,
"eval_loss": 2.183152437210083,
"eval_runtime": 19.1118,
"eval_samples_per_second": 104.647,
"eval_steps_per_second": 1.674,
"step": 143000
},
{
"epoch": 0.92,
"learning_rate": 3.914690972400021e-06,
"loss": 1.9795,
"step": 144000
},
{
"epoch": 0.92,
"eval_loss": 2.199709415435791,
"eval_runtime": 19.352,
"eval_samples_per_second": 103.348,
"eval_steps_per_second": 1.654,
"step": 144000
},
{
"epoch": 0.93,
"learning_rate": 3.5946541041527984e-06,
"loss": 1.9656,
"step": 145000
},
{
"epoch": 0.93,
"eval_loss": 2.200268507003784,
"eval_runtime": 19.3103,
"eval_samples_per_second": 103.572,
"eval_steps_per_second": 1.657,
"step": 145000
},
{
"epoch": 0.93,
"learning_rate": 3.2746172359055764e-06,
"loss": 1.9813,
"step": 146000
},
{
"epoch": 0.93,
"eval_loss": 2.1825687885284424,
"eval_runtime": 19.0952,
"eval_samples_per_second": 104.739,
"eval_steps_per_second": 1.676,
"step": 146000
},
{
"epoch": 0.94,
"learning_rate": 2.9545803676583543e-06,
"loss": 1.9719,
"step": 147000
},
{
"epoch": 0.94,
"eval_loss": 2.1915125846862793,
"eval_runtime": 19.3108,
"eval_samples_per_second": 103.569,
"eval_steps_per_second": 1.657,
"step": 147000
},
{
"epoch": 0.95,
"learning_rate": 2.6345434994111323e-06,
"loss": 1.962,
"step": 148000
},
{
"epoch": 0.95,
"eval_loss": 2.196523904800415,
"eval_runtime": 19.1234,
"eval_samples_per_second": 104.584,
"eval_steps_per_second": 1.673,
"step": 148000
},
{
"epoch": 0.95,
"learning_rate": 2.3145066311639102e-06,
"loss": 1.9657,
"step": 149000
},
{
"epoch": 0.95,
"eval_loss": 2.1772007942199707,
"eval_runtime": 19.0921,
"eval_samples_per_second": 104.756,
"eval_steps_per_second": 1.676,
"step": 149000
},
{
"epoch": 0.96,
"learning_rate": 1.994469762916688e-06,
"loss": 1.9662,
"step": 150000
},
{
"epoch": 0.96,
"eval_loss": 2.151597261428833,
"eval_runtime": 19.1055,
"eval_samples_per_second": 104.682,
"eval_steps_per_second": 1.675,
"step": 150000
},
{
"epoch": 0.97,
"learning_rate": 1.674432894669466e-06,
"loss": 1.9631,
"step": 151000
},
{
"epoch": 0.97,
"eval_loss": 2.1692702770233154,
"eval_runtime": 19.4031,
"eval_samples_per_second": 103.077,
"eval_steps_per_second": 1.649,
"step": 151000
},
{
"epoch": 0.97,
"learning_rate": 1.354396026422244e-06,
"loss": 1.9651,
"step": 152000
},
{
"epoch": 0.97,
"eval_loss": 2.174436330795288,
"eval_runtime": 19.3133,
"eval_samples_per_second": 103.555,
"eval_steps_per_second": 1.657,
"step": 152000
},
{
"epoch": 0.98,
"learning_rate": 1.0343591581750219e-06,
"loss": 1.9761,
"step": 153000
},
{
"epoch": 0.98,
"eval_loss": 2.1922249794006348,
"eval_runtime": 19.2415,
"eval_samples_per_second": 103.942,
"eval_steps_per_second": 1.663,
"step": 153000
},
{
"epoch": 0.99,
"learning_rate": 7.143222899277997e-07,
"loss": 1.9602,
"step": 154000
},
{
"epoch": 0.99,
"eval_loss": 2.177457571029663,
"eval_runtime": 19.1279,
"eval_samples_per_second": 104.559,
"eval_steps_per_second": 1.673,
"step": 154000
},
{
"epoch": 0.99,
"learning_rate": 3.9428542168057766e-07,
"loss": 1.9429,
"step": 155000
},
{
"epoch": 0.99,
"eval_loss": 2.167567491531372,
"eval_runtime": 19.5087,
"eval_samples_per_second": 102.518,
"eval_steps_per_second": 1.64,
"step": 155000
},
{
"epoch": 1.0,
"learning_rate": 7.424855343335553e-08,
"loss": 1.9662,
"step": 156000
},
{
"epoch": 1.0,
"eval_loss": 2.179702043533325,
"eval_runtime": 19.3655,
"eval_samples_per_second": 103.276,
"eval_steps_per_second": 1.652,
"step": 156000
},
{
"epoch": 1.0,
"learning_rate": 2.4877105842593068e-05,
"loss": 1.9883,
"step": 157000
},
{
"epoch": 1.0,
"eval_loss": 2.1984949111938477,
"eval_runtime": 19.5918,
"eval_samples_per_second": 102.084,
"eval_steps_per_second": 1.633,
"step": 157000
},
{
"epoch": 1.01,
"learning_rate": 2.4717087408469457e-05,
"loss": 2.0127,
"step": 158000
},
{
"epoch": 1.01,
"eval_loss": 2.264371633529663,
"eval_runtime": 19.1742,
"eval_samples_per_second": 104.307,
"eval_steps_per_second": 1.669,
"step": 158000
},
{
"epoch": 1.02,
"learning_rate": 2.4557068974345846e-05,
"loss": 2.013,
"step": 159000
},
{
"epoch": 1.02,
"eval_loss": 2.263242721557617,
"eval_runtime": 19.0844,
"eval_samples_per_second": 104.798,
"eval_steps_per_second": 1.677,
"step": 159000
},
{
"epoch": 1.02,
"learning_rate": 2.4397050540222236e-05,
"loss": 2.0243,
"step": 160000
},
{
"epoch": 1.02,
"eval_loss": 2.267091751098633,
"eval_runtime": 19.3957,
"eval_samples_per_second": 103.116,
"eval_steps_per_second": 1.65,
"step": 160000
},
{
"epoch": 1.03,
"learning_rate": 2.423703210609862e-05,
"loss": 2.021,
"step": 161000
},
{
"epoch": 1.03,
"eval_loss": 2.2471094131469727,
"eval_runtime": 19.2438,
"eval_samples_per_second": 103.929,
"eval_steps_per_second": 1.663,
"step": 161000
},
{
"epoch": 1.04,
"learning_rate": 2.4077013671975014e-05,
"loss": 2.0278,
"step": 162000
},
{
"epoch": 1.04,
"eval_loss": 2.2140402793884277,
"eval_runtime": 19.0312,
"eval_samples_per_second": 105.091,
"eval_steps_per_second": 1.681,
"step": 162000
},
{
"epoch": 1.04,
"learning_rate": 2.3916995237851403e-05,
"loss": 2.0109,
"step": 163000
},
{
"epoch": 1.04,
"eval_loss": 2.2622554302215576,
"eval_runtime": 19.0334,
"eval_samples_per_second": 105.078,
"eval_steps_per_second": 1.681,
"step": 163000
},
{
"epoch": 1.05,
"learning_rate": 2.375697680372779e-05,
"loss": 2.023,
"step": 164000
},
{
"epoch": 1.05,
"eval_loss": 2.245877981185913,
"eval_runtime": 19.4264,
"eval_samples_per_second": 102.953,
"eval_steps_per_second": 1.647,
"step": 164000
},
{
"epoch": 1.06,
"learning_rate": 2.359695836960418e-05,
"loss": 2.0187,
"step": 165000
},
{
"epoch": 1.06,
"eval_loss": 2.25624942779541,
"eval_runtime": 19.303,
"eval_samples_per_second": 103.611,
"eval_steps_per_second": 1.658,
"step": 165000
},
{
"epoch": 1.06,
"learning_rate": 2.3436939935480567e-05,
"loss": 2.019,
"step": 166000
},
{
"epoch": 1.06,
"eval_loss": 2.2587056159973145,
"eval_runtime": 18.8102,
"eval_samples_per_second": 106.325,
"eval_steps_per_second": 1.701,
"step": 166000
},
{
"epoch": 1.07,
"learning_rate": 2.3276921501356956e-05,
"loss": 2.0208,
"step": 167000
},
{
"epoch": 1.07,
"eval_loss": 2.2842631340026855,
"eval_runtime": 19.22,
"eval_samples_per_second": 104.058,
"eval_steps_per_second": 1.665,
"step": 167000
},
{
"epoch": 1.08,
"learning_rate": 2.311690306723335e-05,
"loss": 2.0043,
"step": 168000
},
{
"epoch": 1.08,
"eval_loss": 2.2638208866119385,
"eval_runtime": 19.6646,
"eval_samples_per_second": 101.706,
"eval_steps_per_second": 1.627,
"step": 168000
},
{
"epoch": 1.08,
"learning_rate": 2.2956884633109735e-05,
"loss": 2.0171,
"step": 169000
},
{
"epoch": 1.08,
"eval_loss": 2.2604892253875732,
"eval_runtime": 19.2438,
"eval_samples_per_second": 103.93,
"eval_steps_per_second": 1.663,
"step": 169000
},
{
"epoch": 1.09,
"learning_rate": 2.2796866198986124e-05,
"loss": 2.0351,
"step": 170000
},
{
"epoch": 1.09,
"eval_loss": 2.2608911991119385,
"eval_runtime": 19.3036,
"eval_samples_per_second": 103.607,
"eval_steps_per_second": 1.658,
"step": 170000
},
{
"epoch": 1.09,
"learning_rate": 2.2636847764862513e-05,
"loss": 2.0166,
"step": 171000
},
{
"epoch": 1.09,
"eval_loss": 2.2317748069763184,
"eval_runtime": 19.0555,
"eval_samples_per_second": 104.957,
"eval_steps_per_second": 1.679,
"step": 171000
},
{
"epoch": 1.1,
"learning_rate": 2.2476829330738902e-05,
"loss": 2.0102,
"step": 172000
},
{
"epoch": 1.1,
"eval_loss": 2.2210681438446045,
"eval_runtime": 19.4253,
"eval_samples_per_second": 102.958,
"eval_steps_per_second": 1.647,
"step": 172000
},
{
"epoch": 1.11,
"learning_rate": 2.231681089661529e-05,
"loss": 2.0226,
"step": 173000
},
{
"epoch": 1.11,
"eval_loss": 2.2446329593658447,
"eval_runtime": 19.1758,
"eval_samples_per_second": 104.298,
"eval_steps_per_second": 1.669,
"step": 173000
},
{
"epoch": 1.11,
"learning_rate": 2.215679246249168e-05,
"loss": 2.0293,
"step": 174000
},
{
"epoch": 1.11,
"eval_loss": 2.2327494621276855,
"eval_runtime": 19.0577,
"eval_samples_per_second": 104.945,
"eval_steps_per_second": 1.679,
"step": 174000
},
{
"epoch": 1.12,
"learning_rate": 2.199677402836807e-05,
"loss": 2.0269,
"step": 175000
},
{
"epoch": 1.12,
"eval_loss": 2.223355293273926,
"eval_runtime": 19.372,
"eval_samples_per_second": 103.242,
"eval_steps_per_second": 1.652,
"step": 175000
},
{
"epoch": 1.13,
"learning_rate": 2.1836755594244456e-05,
"loss": 2.0232,
"step": 176000
},
{
"epoch": 1.13,
"eval_loss": 2.2283060550689697,
"eval_runtime": 19.4986,
"eval_samples_per_second": 102.572,
"eval_steps_per_second": 1.641,
"step": 176000
},
{
"epoch": 1.13,
"learning_rate": 2.167673716012085e-05,
"loss": 2.0155,
"step": 177000
},
{
"epoch": 1.13,
"eval_loss": 2.241269588470459,
"eval_runtime": 19.5594,
"eval_samples_per_second": 102.253,
"eval_steps_per_second": 1.636,
"step": 177000
},
{
"epoch": 1.14,
"learning_rate": 2.1516718725997238e-05,
"loss": 2.0148,
"step": 178000
},
{
"epoch": 1.14,
"eval_loss": 2.2584030628204346,
"eval_runtime": 18.9767,
"eval_samples_per_second": 105.392,
"eval_steps_per_second": 1.686,
"step": 178000
},
{
"epoch": 1.15,
"learning_rate": 2.1356700291873623e-05,
"loss": 2.0167,
"step": 179000
},
{
"epoch": 1.15,
"eval_loss": 2.2308297157287598,
"eval_runtime": 19.318,
"eval_samples_per_second": 103.531,
"eval_steps_per_second": 1.656,
"step": 179000
},
{
"epoch": 1.15,
"learning_rate": 2.1196681857750016e-05,
"loss": 2.0204,
"step": 180000
},
{
"epoch": 1.15,
"eval_loss": 2.2320470809936523,
"eval_runtime": 19.5088,
"eval_samples_per_second": 102.518,
"eval_steps_per_second": 1.64,
"step": 180000
},
{
"epoch": 1.16,
"learning_rate": 2.1036663423626402e-05,
"loss": 2.014,
"step": 181000
},
{
"epoch": 1.16,
"eval_loss": 2.25752854347229,
"eval_runtime": 19.1454,
"eval_samples_per_second": 104.464,
"eval_steps_per_second": 1.671,
"step": 181000
},
{
"epoch": 1.16,
"learning_rate": 2.087664498950279e-05,
"loss": 2.0149,
"step": 182000
},
{
"epoch": 1.16,
"eval_loss": 2.2161190509796143,
"eval_runtime": 19.0879,
"eval_samples_per_second": 104.779,
"eval_steps_per_second": 1.676,
"step": 182000
},
{
"epoch": 1.17,
"learning_rate": 2.071662655537918e-05,
"loss": 2.0082,
"step": 183000
},
{
"epoch": 1.17,
"eval_loss": 2.2062742710113525,
"eval_runtime": 19.2713,
"eval_samples_per_second": 103.781,
"eval_steps_per_second": 1.66,
"step": 183000
},
{
"epoch": 1.18,
"learning_rate": 2.055660812125557e-05,
"loss": 2.0017,
"step": 184000
},
{
"epoch": 1.18,
"eval_loss": 2.2289586067199707,
"eval_runtime": 19.4499,
"eval_samples_per_second": 102.828,
"eval_steps_per_second": 1.645,
"step": 184000
},
{
"epoch": 1.18,
"learning_rate": 2.039658968713196e-05,
"loss": 2.0146,
"step": 185000
},
{
"epoch": 1.18,
"eval_loss": 2.2288384437561035,
"eval_runtime": 19.335,
"eval_samples_per_second": 103.439,
"eval_steps_per_second": 1.655,
"step": 185000
},
{
"epoch": 1.19,
"learning_rate": 2.0236571253008348e-05,
"loss": 2.024,
"step": 186000
},
{
"epoch": 1.19,
"eval_loss": 2.194934606552124,
"eval_runtime": 19.5009,
"eval_samples_per_second": 102.559,
"eval_steps_per_second": 1.641,
"step": 186000
},
{
"epoch": 1.2,
"learning_rate": 2.0076552818884737e-05,
"loss": 2.0016,
"step": 187000
},
{
"epoch": 1.2,
"eval_loss": 2.197631597518921,
"eval_runtime": 19.2128,
"eval_samples_per_second": 104.097,
"eval_steps_per_second": 1.666,
"step": 187000
},
{
"epoch": 1.2,
"learning_rate": 1.9916534384761126e-05,
"loss": 2.0066,
"step": 188000
},
{
"epoch": 1.2,
"eval_loss": 2.238746166229248,
"eval_runtime": 19.4524,
"eval_samples_per_second": 102.815,
"eval_steps_per_second": 1.645,
"step": 188000
},
{
"epoch": 1.21,
"learning_rate": 1.9756515950637515e-05,
"loss": 2.0168,
"step": 189000
},
{
"epoch": 1.21,
"eval_loss": 2.2261757850646973,
"eval_runtime": 19.645,
"eval_samples_per_second": 101.807,
"eval_steps_per_second": 1.629,
"step": 189000
},
{
"epoch": 1.22,
"learning_rate": 1.9596497516513904e-05,
"loss": 2.0023,
"step": 190000
},
{
"epoch": 1.22,
"eval_loss": 2.2070722579956055,
"eval_runtime": 19.0874,
"eval_samples_per_second": 104.781,
"eval_steps_per_second": 1.676,
"step": 190000
},
{
"epoch": 1.22,
"learning_rate": 1.943647908239029e-05,
"loss": 1.9917,
"step": 191000
},
{
"epoch": 1.22,
"eval_loss": 2.2613461017608643,
"eval_runtime": 19.1099,
"eval_samples_per_second": 104.658,
"eval_steps_per_second": 1.675,
"step": 191000
},
{
"epoch": 1.23,
"learning_rate": 1.927646064826668e-05,
"loss": 2.01,
"step": 192000
},
{
"epoch": 1.23,
"eval_loss": 2.2324349880218506,
"eval_runtime": 20.8611,
"eval_samples_per_second": 95.872,
"eval_steps_per_second": 1.534,
"step": 192000
},
{
"epoch": 1.24,
"learning_rate": 1.9116442214143072e-05,
"loss": 2.0023,
"step": 193000
},
{
"epoch": 1.24,
"eval_loss": 2.2707834243774414,
"eval_runtime": 19.7356,
"eval_samples_per_second": 101.34,
"eval_steps_per_second": 1.621,
"step": 193000
},
{
"epoch": 1.24,
"learning_rate": 1.8956423780019458e-05,
"loss": 2.0037,
"step": 194000
},
{
"epoch": 1.24,
"eval_loss": 2.2384769916534424,
"eval_runtime": 19.0414,
"eval_samples_per_second": 105.034,
"eval_steps_per_second": 1.681,
"step": 194000
},
{
"epoch": 1.25,
"learning_rate": 1.8796405345895847e-05,
"loss": 1.9994,
"step": 195000
},
{
"epoch": 1.25,
"eval_loss": 2.192796230316162,
"eval_runtime": 19.0496,
"eval_samples_per_second": 104.989,
"eval_steps_per_second": 1.68,
"step": 195000
},
{
"epoch": 1.25,
"learning_rate": 1.863638691177224e-05,
"loss": 1.994,
"step": 196000
},
{
"epoch": 1.25,
"eval_loss": 2.170961618423462,
"eval_runtime": 19.6903,
"eval_samples_per_second": 101.573,
"eval_steps_per_second": 1.625,
"step": 196000
},
{
"epoch": 1.26,
"learning_rate": 1.8476368477648625e-05,
"loss": 2.0016,
"step": 197000
},
{
"epoch": 1.26,
"eval_loss": 2.2660317420959473,
"eval_runtime": 19.6654,
"eval_samples_per_second": 101.702,
"eval_steps_per_second": 1.627,
"step": 197000
},
{
"epoch": 1.27,
"learning_rate": 1.8316350043525015e-05,
"loss": 2.0044,
"step": 198000
},
{
"epoch": 1.27,
"eval_loss": 2.204163074493408,
"eval_runtime": 18.9759,
"eval_samples_per_second": 105.397,
"eval_steps_per_second": 1.686,
"step": 198000
},
{
"epoch": 1.27,
"learning_rate": 1.8156331609401404e-05,
"loss": 1.9962,
"step": 199000
},
{
"epoch": 1.27,
"eval_loss": 2.214494228363037,
"eval_runtime": 19.1044,
"eval_samples_per_second": 104.688,
"eval_steps_per_second": 1.675,
"step": 199000
},
{
"epoch": 1.28,
"learning_rate": 1.7996313175277793e-05,
"loss": 2.002,
"step": 200000
},
{
"epoch": 1.28,
"eval_loss": 2.231771230697632,
"eval_runtime": 19.3683,
"eval_samples_per_second": 103.262,
"eval_steps_per_second": 1.652,
"step": 200000
},
{
"epoch": 1.29,
"learning_rate": 1.7836294741154182e-05,
"loss": 1.9933,
"step": 201000
},
{
"epoch": 1.29,
"eval_loss": 2.2037816047668457,
"eval_runtime": 19.3894,
"eval_samples_per_second": 103.149,
"eval_steps_per_second": 1.65,
"step": 201000
},
{
"epoch": 1.29,
"learning_rate": 1.767627630703057e-05,
"loss": 2.01,
"step": 202000
},
{
"epoch": 1.29,
"eval_loss": 2.1932146549224854,
"eval_runtime": 19.0804,
"eval_samples_per_second": 104.819,
"eval_steps_per_second": 1.677,
"step": 202000
},
{
"epoch": 1.3,
"learning_rate": 1.751625787290696e-05,
"loss": 1.9876,
"step": 203000
},
{
"epoch": 1.3,
"eval_loss": 2.1909868717193604,
"eval_runtime": 19.2334,
"eval_samples_per_second": 103.986,
"eval_steps_per_second": 1.664,
"step": 203000
},
{
"epoch": 1.31,
"learning_rate": 1.7356239438783346e-05,
"loss": 1.9959,
"step": 204000
},
{
"epoch": 1.31,
"eval_loss": 2.226149559020996,
"eval_runtime": 19.403,
"eval_samples_per_second": 103.077,
"eval_steps_per_second": 1.649,
"step": 204000
},
{
"epoch": 1.31,
"learning_rate": 1.719622100465974e-05,
"loss": 1.9966,
"step": 205000
},
{
"epoch": 1.31,
"eval_loss": 2.250934600830078,
"eval_runtime": 19.4964,
"eval_samples_per_second": 102.583,
"eval_steps_per_second": 1.641,
"step": 205000
},
{
"epoch": 1.32,
"learning_rate": 1.7036202570536128e-05,
"loss": 2.001,
"step": 206000
},
{
"epoch": 1.32,
"eval_loss": 2.1994211673736572,
"eval_runtime": 19.1839,
"eval_samples_per_second": 104.254,
"eval_steps_per_second": 1.668,
"step": 206000
},
{
"epoch": 1.32,
"learning_rate": 1.6876184136412514e-05,
"loss": 1.9883,
"step": 207000
},
{
"epoch": 1.32,
"eval_loss": 2.196751356124878,
"eval_runtime": 19.6979,
"eval_samples_per_second": 101.534,
"eval_steps_per_second": 1.625,
"step": 207000
},
{
"epoch": 1.33,
"learning_rate": 1.6716165702288906e-05,
"loss": 1.9968,
"step": 208000
},
{
"epoch": 1.33,
"eval_loss": 2.248135805130005,
"eval_runtime": 19.2411,
"eval_samples_per_second": 103.944,
"eval_steps_per_second": 1.663,
"step": 208000
},
{
"epoch": 1.34,
"learning_rate": 1.6556147268165292e-05,
"loss": 1.9951,
"step": 209000
},
{
"epoch": 1.34,
"eval_loss": 2.213362216949463,
"eval_runtime": 19.146,
"eval_samples_per_second": 104.46,
"eval_steps_per_second": 1.671,
"step": 209000
},
{
"epoch": 1.34,
"learning_rate": 1.639612883404168e-05,
"loss": 1.9941,
"step": 210000
},
{
"epoch": 1.34,
"eval_loss": 2.219302177429199,
"eval_runtime": 19.0054,
"eval_samples_per_second": 105.233,
"eval_steps_per_second": 1.684,
"step": 210000
},
{
"epoch": 1.35,
"learning_rate": 1.6236110399918074e-05,
"loss": 1.9875,
"step": 211000
},
{
"epoch": 1.35,
"eval_loss": 2.2148916721343994,
"eval_runtime": 19.4732,
"eval_samples_per_second": 102.705,
"eval_steps_per_second": 1.643,
"step": 211000
},
{
"epoch": 1.36,
"learning_rate": 1.607609196579446e-05,
"loss": 2.0026,
"step": 212000
},
{
"epoch": 1.36,
"eval_loss": 2.197999954223633,
"eval_runtime": 19.3649,
"eval_samples_per_second": 103.28,
"eval_steps_per_second": 1.652,
"step": 212000
},
{
"epoch": 1.36,
"learning_rate": 1.591607353167085e-05,
"loss": 1.9908,
"step": 213000
},
{
"epoch": 1.36,
"eval_loss": 2.2245354652404785,
"eval_runtime": 19.4688,
"eval_samples_per_second": 102.728,
"eval_steps_per_second": 1.644,
"step": 213000
},
{
"epoch": 1.37,
"learning_rate": 1.5756055097547238e-05,
"loss": 1.979,
"step": 214000
},
{
"epoch": 1.37,
"eval_loss": 2.186586856842041,
"eval_runtime": 19.6234,
"eval_samples_per_second": 101.919,
"eval_steps_per_second": 1.631,
"step": 214000
},
{
"epoch": 1.38,
"learning_rate": 1.5596036663423627e-05,
"loss": 1.99,
"step": 215000
},
{
"epoch": 1.38,
"eval_loss": 2.182631015777588,
"eval_runtime": 19.4018,
"eval_samples_per_second": 103.083,
"eval_steps_per_second": 1.649,
"step": 215000
},
{
"epoch": 1.38,
"learning_rate": 1.5436018229300017e-05,
"loss": 1.9816,
"step": 216000
},
{
"epoch": 1.38,
"eval_loss": 2.187858819961548,
"eval_runtime": 19.4098,
"eval_samples_per_second": 103.041,
"eval_steps_per_second": 1.649,
"step": 216000
},
{
"epoch": 1.39,
"learning_rate": 1.5275999795176406e-05,
"loss": 1.989,
"step": 217000
},
{
"epoch": 1.39,
"eval_loss": 2.232002019882202,
"eval_runtime": 19.4529,
"eval_samples_per_second": 102.813,
"eval_steps_per_second": 1.645,
"step": 217000
},
{
"epoch": 1.4,
"learning_rate": 1.5115981361052795e-05,
"loss": 1.9931,
"step": 218000
},
{
"epoch": 1.4,
"eval_loss": 2.1929688453674316,
"eval_runtime": 19.3402,
"eval_samples_per_second": 103.411,
"eval_steps_per_second": 1.655,
"step": 218000
},
{
"epoch": 1.4,
"learning_rate": 1.4955962926929182e-05,
"loss": 1.9804,
"step": 219000
},
{
"epoch": 1.4,
"eval_loss": 2.2313404083251953,
"eval_runtime": 19.6691,
"eval_samples_per_second": 101.682,
"eval_steps_per_second": 1.627,
"step": 219000
},
{
"epoch": 1.41,
"learning_rate": 1.4795944492805572e-05,
"loss": 1.9902,
"step": 220000
},
{
"epoch": 1.41,
"eval_loss": 2.1808815002441406,
"eval_runtime": 19.8875,
"eval_samples_per_second": 100.566,
"eval_steps_per_second": 1.609,
"step": 220000
},
{
"epoch": 1.41,
"learning_rate": 1.4635926058681963e-05,
"loss": 1.9791,
"step": 221000
},
{
"epoch": 1.41,
"eval_loss": 2.1454262733459473,
"eval_runtime": 19.9595,
"eval_samples_per_second": 100.203,
"eval_steps_per_second": 1.603,
"step": 221000
},
{
"epoch": 1.42,
"learning_rate": 1.4475907624558348e-05,
"loss": 1.9702,
"step": 222000
},
{
"epoch": 1.42,
"eval_loss": 2.220078468322754,
"eval_runtime": 19.5477,
"eval_samples_per_second": 102.314,
"eval_steps_per_second": 1.637,
"step": 222000
},
{
"epoch": 1.43,
"learning_rate": 1.431588919043474e-05,
"loss": 1.9848,
"step": 223000
},
{
"epoch": 1.43,
"eval_loss": 2.198873281478882,
"eval_runtime": 19.8165,
"eval_samples_per_second": 100.926,
"eval_steps_per_second": 1.615,
"step": 223000
},
{
"epoch": 1.43,
"learning_rate": 1.4155870756311127e-05,
"loss": 1.9813,
"step": 224000
},
{
"epoch": 1.43,
"eval_loss": 2.197327136993408,
"eval_runtime": 21.9598,
"eval_samples_per_second": 91.076,
"eval_steps_per_second": 1.457,
"step": 224000
},
{
"epoch": 1.44,
"learning_rate": 1.3995852322187516e-05,
"loss": 1.9784,
"step": 225000
},
{
"epoch": 1.44,
"eval_loss": 2.189138889312744,
"eval_runtime": 19.3319,
"eval_samples_per_second": 103.456,
"eval_steps_per_second": 1.655,
"step": 225000
},
{
"epoch": 1.45,
"learning_rate": 1.3835833888063907e-05,
"loss": 1.9766,
"step": 226000
},
{
"epoch": 1.45,
"eval_loss": 2.20912504196167,
"eval_runtime": 19.5253,
"eval_samples_per_second": 102.431,
"eval_steps_per_second": 1.639,
"step": 226000
},
{
"epoch": 1.45,
"learning_rate": 1.3675815453940294e-05,
"loss": 1.9732,
"step": 227000
},
{
"epoch": 1.45,
"eval_loss": 2.140838384628296,
"eval_runtime": 19.1497,
"eval_samples_per_second": 104.44,
"eval_steps_per_second": 1.671,
"step": 227000
},
{
"epoch": 1.46,
"learning_rate": 1.3515797019816683e-05,
"loss": 1.9621,
"step": 228000
},
{
"epoch": 1.46,
"eval_loss": 2.226170063018799,
"eval_runtime": 19.0166,
"eval_samples_per_second": 105.171,
"eval_steps_per_second": 1.683,
"step": 228000
},
{
"epoch": 1.47,
"learning_rate": 1.3355778585693071e-05,
"loss": 1.9739,
"step": 229000
},
{
"epoch": 1.47,
"eval_loss": 2.2281548976898193,
"eval_runtime": 19.3581,
"eval_samples_per_second": 103.316,
"eval_steps_per_second": 1.653,
"step": 229000
},
{
"epoch": 1.47,
"learning_rate": 1.3195760151569462e-05,
"loss": 1.968,
"step": 230000
},
{
"epoch": 1.47,
"eval_loss": 2.205911636352539,
"eval_runtime": 19.2592,
"eval_samples_per_second": 103.846,
"eval_steps_per_second": 1.662,
"step": 230000
},
{
"epoch": 1.48,
"learning_rate": 1.3035741717445851e-05,
"loss": 1.9656,
"step": 231000
},
{
"epoch": 1.48,
"eval_loss": 2.2183620929718018,
"eval_runtime": 19.2973,
"eval_samples_per_second": 103.641,
"eval_steps_per_second": 1.658,
"step": 231000
},
{
"epoch": 1.48,
"learning_rate": 1.2875723283322239e-05,
"loss": 1.9728,
"step": 232000
},
{
"epoch": 1.48,
"eval_loss": 2.1920948028564453,
"eval_runtime": 19.4211,
"eval_samples_per_second": 102.981,
"eval_steps_per_second": 1.648,
"step": 232000
},
{
"epoch": 1.49,
"learning_rate": 1.271570484919863e-05,
"loss": 1.9577,
"step": 233000
},
{
"epoch": 1.49,
"eval_loss": 2.191782236099243,
"eval_runtime": 19.3617,
"eval_samples_per_second": 103.296,
"eval_steps_per_second": 1.653,
"step": 233000
},
{
"epoch": 1.5,
"learning_rate": 1.2555686415075015e-05,
"loss": 1.9777,
"step": 234000
},
{
"epoch": 1.5,
"eval_loss": 2.209336042404175,
"eval_runtime": 19.3939,
"eval_samples_per_second": 103.125,
"eval_steps_per_second": 1.65,
"step": 234000
},
{
"epoch": 1.5,
"learning_rate": 1.2395667980951406e-05,
"loss": 1.9662,
"step": 235000
},
{
"epoch": 1.5,
"eval_loss": 2.152353048324585,
"eval_runtime": 19.7245,
"eval_samples_per_second": 101.397,
"eval_steps_per_second": 1.622,
"step": 235000
},
{
"epoch": 1.51,
"learning_rate": 1.2235649546827795e-05,
"loss": 1.9681,
"step": 236000
},
{
"epoch": 1.51,
"eval_loss": 2.1999175548553467,
"eval_runtime": 18.9532,
"eval_samples_per_second": 105.523,
"eval_steps_per_second": 1.688,
"step": 236000
},
{
"epoch": 1.52,
"learning_rate": 1.2075631112704184e-05,
"loss": 1.9543,
"step": 237000
},
{
"epoch": 1.52,
"eval_loss": 2.1981661319732666,
"eval_runtime": 19.2785,
"eval_samples_per_second": 103.742,
"eval_steps_per_second": 1.66,
"step": 237000
},
{
"epoch": 1.52,
"learning_rate": 1.1915612678580574e-05,
"loss": 1.9636,
"step": 238000
},
{
"epoch": 1.52,
"eval_loss": 2.197685956954956,
"eval_runtime": 19.3506,
"eval_samples_per_second": 103.356,
"eval_steps_per_second": 1.654,
"step": 238000
},
{
"epoch": 1.53,
"learning_rate": 1.1755594244456961e-05,
"loss": 1.9623,
"step": 239000
},
{
"epoch": 1.53,
"eval_loss": 2.207620620727539,
"eval_runtime": 19.1912,
"eval_samples_per_second": 104.214,
"eval_steps_per_second": 1.667,
"step": 239000
},
{
"epoch": 1.54,
"learning_rate": 1.159557581033335e-05,
"loss": 1.9645,
"step": 240000
},
{
"epoch": 1.54,
"eval_loss": 2.1756386756896973,
"eval_runtime": 19.1978,
"eval_samples_per_second": 104.178,
"eval_steps_per_second": 1.667,
"step": 240000
},
{
"epoch": 1.54,
"learning_rate": 1.143555737620974e-05,
"loss": 1.9676,
"step": 241000
},
{
"epoch": 1.54,
"eval_loss": 2.1699678897857666,
"eval_runtime": 19.2027,
"eval_samples_per_second": 104.152,
"eval_steps_per_second": 1.666,
"step": 241000
},
{
"epoch": 1.55,
"learning_rate": 1.1275538942086129e-05,
"loss": 1.9552,
"step": 242000
},
{
"epoch": 1.55,
"eval_loss": 2.1813385486602783,
"eval_runtime": 19.1939,
"eval_samples_per_second": 104.2,
"eval_steps_per_second": 1.667,
"step": 242000
},
{
"epoch": 1.56,
"learning_rate": 1.1115520507962518e-05,
"loss": 1.9675,
"step": 243000
},
{
"epoch": 1.56,
"eval_loss": 2.1804428100585938,
"eval_runtime": 19.3246,
"eval_samples_per_second": 103.495,
"eval_steps_per_second": 1.656,
"step": 243000
},
{
"epoch": 1.56,
"learning_rate": 1.0955502073838907e-05,
"loss": 1.9707,
"step": 244000
},
{
"epoch": 1.56,
"eval_loss": 2.1776347160339355,
"eval_runtime": 19.4613,
"eval_samples_per_second": 102.768,
"eval_steps_per_second": 1.644,
"step": 244000
},
{
"epoch": 1.57,
"learning_rate": 1.0795483639715295e-05,
"loss": 1.9609,
"step": 245000
},
{
"epoch": 1.57,
"eval_loss": 2.2101809978485107,
"eval_runtime": 19.232,
"eval_samples_per_second": 103.993,
"eval_steps_per_second": 1.664,
"step": 245000
},
{
"epoch": 1.57,
"learning_rate": 1.0635465205591686e-05,
"loss": 1.9584,
"step": 246000
},
{
"epoch": 1.57,
"eval_loss": 2.18208384513855,
"eval_runtime": 19.1408,
"eval_samples_per_second": 104.489,
"eval_steps_per_second": 1.672,
"step": 246000
},
{
"epoch": 1.58,
"learning_rate": 1.0475446771468075e-05,
"loss": 1.9568,
"step": 247000
},
{
"epoch": 1.58,
"eval_loss": 2.164984941482544,
"eval_runtime": 19.2986,
"eval_samples_per_second": 103.634,
"eval_steps_per_second": 1.658,
"step": 247000
},
{
"epoch": 1.59,
"learning_rate": 1.0315428337344462e-05,
"loss": 1.9514,
"step": 248000
},
{
"epoch": 1.59,
"eval_loss": 2.218735456466675,
"eval_runtime": 19.5707,
"eval_samples_per_second": 102.193,
"eval_steps_per_second": 1.635,
"step": 248000
},
{
"epoch": 1.59,
"learning_rate": 1.0155409903220851e-05,
"loss": 1.9567,
"step": 249000
},
{
"epoch": 1.59,
"eval_loss": 2.1572988033294678,
"eval_runtime": 19.0634,
"eval_samples_per_second": 104.913,
"eval_steps_per_second": 1.679,
"step": 249000
},
{
"epoch": 1.6,
"learning_rate": 9.99539146909724e-06,
"loss": 1.9555,
"step": 250000
},
{
"epoch": 1.6,
"eval_loss": 2.1475002765655518,
"eval_runtime": 19.0267,
"eval_samples_per_second": 105.115,
"eval_steps_per_second": 1.682,
"step": 250000
},
{
"epoch": 1.61,
"learning_rate": 9.83537303497363e-06,
"loss": 1.965,
"step": 251000
},
{
"epoch": 1.61,
"eval_loss": 2.1785731315612793,
"eval_runtime": 19.7697,
"eval_samples_per_second": 101.165,
"eval_steps_per_second": 1.619,
"step": 251000
},
{
"epoch": 1.61,
"learning_rate": 9.675354600850019e-06,
"loss": 1.9508,
"step": 252000
},
{
"epoch": 1.61,
"eval_loss": 2.1723153591156006,
"eval_runtime": 19.1786,
"eval_samples_per_second": 104.283,
"eval_steps_per_second": 1.669,
"step": 252000
},
{
"epoch": 1.62,
"learning_rate": 9.515336166726408e-06,
"loss": 1.9522,
"step": 253000
},
{
"epoch": 1.62,
"eval_loss": 2.180307626724243,
"eval_runtime": 18.9009,
"eval_samples_per_second": 105.815,
"eval_steps_per_second": 1.693,
"step": 253000
},
{
"epoch": 1.63,
"learning_rate": 9.355317732602796e-06,
"loss": 1.9637,
"step": 254000
},
{
"epoch": 1.63,
"eval_loss": 2.179806709289551,
"eval_runtime": 19.3455,
"eval_samples_per_second": 103.383,
"eval_steps_per_second": 1.654,
"step": 254000
},
{
"epoch": 1.63,
"learning_rate": 9.195299298479185e-06,
"loss": 1.9588,
"step": 255000
},
{
"epoch": 1.63,
"eval_loss": 2.200853109359741,
"eval_runtime": 19.4782,
"eval_samples_per_second": 102.679,
"eval_steps_per_second": 1.643,
"step": 255000
},
{
"epoch": 1.64,
"learning_rate": 9.035280864355574e-06,
"loss": 1.9553,
"step": 256000
},
{
"epoch": 1.64,
"eval_loss": 2.1626343727111816,
"eval_runtime": 19.24,
"eval_samples_per_second": 103.95,
"eval_steps_per_second": 1.663,
"step": 256000
},
{
"epoch": 1.64,
"learning_rate": 8.875262430231963e-06,
"loss": 1.946,
"step": 257000
},
{
"epoch": 1.64,
"eval_loss": 2.1843950748443604,
"eval_runtime": 19.1181,
"eval_samples_per_second": 104.613,
"eval_steps_per_second": 1.674,
"step": 257000
},
{
"epoch": 1.65,
"learning_rate": 8.715243996108352e-06,
"loss": 1.9493,
"step": 258000
},
{
"epoch": 1.65,
"eval_loss": 2.150207757949829,
"eval_runtime": 19.2502,
"eval_samples_per_second": 103.895,
"eval_steps_per_second": 1.662,
"step": 258000
},
{
"epoch": 1.66,
"learning_rate": 8.55522556198474e-06,
"loss": 1.9442,
"step": 259000
},
{
"epoch": 1.66,
"eval_loss": 2.1614534854888916,
"eval_runtime": 19.2393,
"eval_samples_per_second": 103.954,
"eval_steps_per_second": 1.663,
"step": 259000
},
{
"epoch": 1.66,
"learning_rate": 8.395207127861129e-06,
"loss": 1.945,
"step": 260000
},
{
"epoch": 1.66,
"eval_loss": 2.178889751434326,
"eval_runtime": 19.4657,
"eval_samples_per_second": 102.745,
"eval_steps_per_second": 1.644,
"step": 260000
},
{
"epoch": 1.67,
"learning_rate": 8.23518869373752e-06,
"loss": 1.9368,
"step": 261000
},
{
"epoch": 1.67,
"eval_loss": 2.172461986541748,
"eval_runtime": 19.2788,
"eval_samples_per_second": 103.741,
"eval_steps_per_second": 1.66,
"step": 261000
},
{
"epoch": 1.68,
"learning_rate": 8.075170259613907e-06,
"loss": 1.9393,
"step": 262000
},
{
"epoch": 1.68,
"eval_loss": 2.169734001159668,
"eval_runtime": 19.3666,
"eval_samples_per_second": 103.27,
"eval_steps_per_second": 1.652,
"step": 262000
},
{
"epoch": 1.68,
"learning_rate": 7.915151825490297e-06,
"loss": 1.9525,
"step": 263000
},
{
"epoch": 1.68,
"eval_loss": 2.1597206592559814,
"eval_runtime": 19.3459,
"eval_samples_per_second": 103.381,
"eval_steps_per_second": 1.654,
"step": 263000
},
{
"epoch": 1.69,
"learning_rate": 7.755133391366686e-06,
"loss": 1.9444,
"step": 264000
},
{
"epoch": 1.69,
"eval_loss": 2.1798765659332275,
"eval_runtime": 19.0083,
"eval_samples_per_second": 105.217,
"eval_steps_per_second": 1.683,
"step": 264000
},
{
"epoch": 1.7,
"learning_rate": 7.595114957243074e-06,
"loss": 1.9352,
"step": 265000
},
{
"epoch": 1.7,
"eval_loss": 2.164872169494629,
"eval_runtime": 19.1384,
"eval_samples_per_second": 104.502,
"eval_steps_per_second": 1.672,
"step": 265000
},
{
"epoch": 1.7,
"learning_rate": 7.435096523119464e-06,
"loss": 1.9537,
"step": 266000
},
{
"epoch": 1.7,
"eval_loss": 2.1663596630096436,
"eval_runtime": 19.6791,
"eval_samples_per_second": 101.63,
"eval_steps_per_second": 1.626,
"step": 266000
},
{
"epoch": 1.71,
"learning_rate": 7.2750780889958526e-06,
"loss": 1.9399,
"step": 267000
},
{
"epoch": 1.71,
"eval_loss": 2.1855850219726562,
"eval_runtime": 19.3954,
"eval_samples_per_second": 103.117,
"eval_steps_per_second": 1.65,
"step": 267000
},
{
"epoch": 1.72,
"learning_rate": 7.115059654872242e-06,
"loss": 1.9325,
"step": 268000
},
{
"epoch": 1.72,
"eval_loss": 2.1838717460632324,
"eval_runtime": 19.1074,
"eval_samples_per_second": 104.671,
"eval_steps_per_second": 1.675,
"step": 268000
},
{
"epoch": 1.72,
"learning_rate": 6.95504122074863e-06,
"loss": 1.9466,
"step": 269000
},
{
"epoch": 1.72,
"eval_loss": 2.1524887084960938,
"eval_runtime": 19.375,
"eval_samples_per_second": 103.226,
"eval_steps_per_second": 1.652,
"step": 269000
},
{
"epoch": 1.73,
"learning_rate": 6.79502278662502e-06,
"loss": 1.9403,
"step": 270000
},
{
"epoch": 1.73,
"eval_loss": 2.1773369312286377,
"eval_runtime": 19.1103,
"eval_samples_per_second": 104.656,
"eval_steps_per_second": 1.674,
"step": 270000
},
{
"epoch": 1.73,
"learning_rate": 6.6350043525014085e-06,
"loss": 1.9391,
"step": 271000
},
{
"epoch": 1.73,
"eval_loss": 2.212693452835083,
"eval_runtime": 19.2143,
"eval_samples_per_second": 104.089,
"eval_steps_per_second": 1.665,
"step": 271000
},
{
"epoch": 1.74,
"learning_rate": 6.474985918377798e-06,
"loss": 1.9419,
"step": 272000
},
{
"epoch": 1.74,
"eval_loss": 2.1781909465789795,
"eval_runtime": 19.4708,
"eval_samples_per_second": 102.718,
"eval_steps_per_second": 1.643,
"step": 272000
},
{
"epoch": 1.75,
"learning_rate": 6.314967484254186e-06,
"loss": 1.9454,
"step": 273000
},
{
"epoch": 1.75,
"eval_loss": 2.1962130069732666,
"eval_runtime": 18.6565,
"eval_samples_per_second": 107.201,
"eval_steps_per_second": 1.715,
"step": 273000
},
{
"epoch": 1.75,
"learning_rate": 6.154949050130575e-06,
"loss": 1.946,
"step": 274000
},
{
"epoch": 1.75,
"eval_loss": 2.157792091369629,
"eval_runtime": 19.0429,
"eval_samples_per_second": 105.026,
"eval_steps_per_second": 1.68,
"step": 274000
},
{
"epoch": 1.76,
"learning_rate": 5.994930616006964e-06,
"loss": 1.9339,
"step": 275000
},
{
"epoch": 1.76,
"eval_loss": 2.190920829772949,
"eval_runtime": 18.7174,
"eval_samples_per_second": 106.853,
"eval_steps_per_second": 1.71,
"step": 275000
},
{
"epoch": 1.77,
"learning_rate": 5.8349121818833536e-06,
"loss": 1.9289,
"step": 276000
},
{
"epoch": 1.77,
"eval_loss": 2.169802665710449,
"eval_runtime": 19.7624,
"eval_samples_per_second": 101.202,
"eval_steps_per_second": 1.619,
"step": 276000
},
{
"epoch": 1.77,
"learning_rate": 5.674893747759742e-06,
"loss": 1.9284,
"step": 277000
},
{
"epoch": 1.77,
"eval_loss": 2.149372100830078,
"eval_runtime": 18.847,
"eval_samples_per_second": 106.118,
"eval_steps_per_second": 1.698,
"step": 277000
},
{
"epoch": 1.78,
"learning_rate": 5.514875313636131e-06,
"loss": 1.9423,
"step": 278000
},
{
"epoch": 1.78,
"eval_loss": 2.163377046585083,
"eval_runtime": 19.097,
"eval_samples_per_second": 104.728,
"eval_steps_per_second": 1.676,
"step": 278000
},
{
"epoch": 1.79,
"learning_rate": 5.35485687951252e-06,
"loss": 1.9317,
"step": 279000
},
{
"epoch": 1.79,
"eval_loss": 2.129027843475342,
"eval_runtime": 18.715,
"eval_samples_per_second": 106.866,
"eval_steps_per_second": 1.71,
"step": 279000
},
{
"epoch": 1.79,
"learning_rate": 5.194838445388909e-06,
"loss": 1.9216,
"step": 280000
},
{
"epoch": 1.79,
"eval_loss": 2.171983480453491,
"eval_runtime": 18.8986,
"eval_samples_per_second": 105.828,
"eval_steps_per_second": 1.693,
"step": 280000
},
{
"epoch": 1.8,
"learning_rate": 5.034820011265298e-06,
"loss": 1.9176,
"step": 281000
},
{
"epoch": 1.8,
"eval_loss": 2.1561877727508545,
"eval_runtime": 18.6229,
"eval_samples_per_second": 107.395,
"eval_steps_per_second": 1.718,
"step": 281000
},
{
"epoch": 1.81,
"learning_rate": 4.874801577141687e-06,
"loss": 1.9345,
"step": 282000
},
{
"epoch": 1.81,
"eval_loss": 2.1655592918395996,
"eval_runtime": 18.6917,
"eval_samples_per_second": 106.999,
"eval_steps_per_second": 1.712,
"step": 282000
},
{
"epoch": 1.81,
"learning_rate": 4.714783143018076e-06,
"loss": 1.9431,
"step": 283000
},
{
"epoch": 1.81,
"eval_loss": 2.1130497455596924,
"eval_runtime": 18.7533,
"eval_samples_per_second": 106.648,
"eval_steps_per_second": 1.706,
"step": 283000
},
{
"epoch": 1.82,
"learning_rate": 4.5547647088944646e-06,
"loss": 1.936,
"step": 284000
},
{
"epoch": 1.82,
"eval_loss": 2.1281943321228027,
"eval_runtime": 18.4643,
"eval_samples_per_second": 108.317,
"eval_steps_per_second": 1.733,
"step": 284000
},
{
"epoch": 1.82,
"learning_rate": 4.394746274770854e-06,
"loss": 1.9344,
"step": 285000
},
{
"epoch": 1.82,
"eval_loss": 2.142157554626465,
"eval_runtime": 18.6731,
"eval_samples_per_second": 107.106,
"eval_steps_per_second": 1.714,
"step": 285000
},
{
"epoch": 1.83,
"learning_rate": 4.234727840647243e-06,
"loss": 1.9237,
"step": 286000
},
{
"epoch": 1.83,
"eval_loss": 2.1462085247039795,
"eval_runtime": 18.787,
"eval_samples_per_second": 106.457,
"eval_steps_per_second": 1.703,
"step": 286000
},
{
"epoch": 1.84,
"learning_rate": 4.074709406523631e-06,
"loss": 1.9309,
"step": 287000
},
{
"epoch": 1.84,
"eval_loss": 2.1435041427612305,
"eval_runtime": 18.7845,
"eval_samples_per_second": 106.471,
"eval_steps_per_second": 1.704,
"step": 287000
},
{
"epoch": 1.84,
"learning_rate": 3.914690972400021e-06,
"loss": 1.9239,
"step": 288000
},
{
"epoch": 1.84,
"eval_loss": 2.152646064758301,
"eval_runtime": 18.6983,
"eval_samples_per_second": 106.961,
"eval_steps_per_second": 1.711,
"step": 288000
},
{
"epoch": 1.85,
"learning_rate": 3.7546725382764097e-06,
"loss": 1.9168,
"step": 289000
},
{
"epoch": 1.85,
"eval_loss": 2.1280956268310547,
"eval_runtime": 18.8639,
"eval_samples_per_second": 106.023,
"eval_steps_per_second": 1.696,
"step": 289000
},
{
"epoch": 1.86,
"learning_rate": 3.5946541041527984e-06,
"loss": 1.9232,
"step": 290000
},
{
"epoch": 1.86,
"eval_loss": 2.143430471420288,
"eval_runtime": 18.873,
"eval_samples_per_second": 105.971,
"eval_steps_per_second": 1.696,
"step": 290000
},
{
"epoch": 1.86,
"learning_rate": 3.4346356700291876e-06,
"loss": 1.9338,
"step": 291000
},
{
"epoch": 1.86,
"eval_loss": 2.1642520427703857,
"eval_runtime": 18.6105,
"eval_samples_per_second": 107.466,
"eval_steps_per_second": 1.719,
"step": 291000
},
{
"epoch": 1.87,
"learning_rate": 3.2746172359055764e-06,
"loss": 1.9241,
"step": 292000
},
{
"epoch": 1.87,
"eval_loss": 2.120400905609131,
"eval_runtime": 18.6654,
"eval_samples_per_second": 107.15,
"eval_steps_per_second": 1.714,
"step": 292000
},
{
"epoch": 1.88,
"learning_rate": 3.114598801781965e-06,
"loss": 1.9209,
"step": 293000
},
{
"epoch": 1.88,
"eval_loss": 2.1418490409851074,
"eval_runtime": 18.986,
"eval_samples_per_second": 105.341,
"eval_steps_per_second": 1.685,
"step": 293000
},
{
"epoch": 1.88,
"learning_rate": 2.9545803676583543e-06,
"loss": 1.928,
"step": 294000
},
{
"epoch": 1.88,
"eval_loss": 2.1255481243133545,
"eval_runtime": 18.6322,
"eval_samples_per_second": 107.341,
"eval_steps_per_second": 1.717,
"step": 294000
},
{
"epoch": 1.89,
"learning_rate": 2.7945619335347435e-06,
"loss": 1.9482,
"step": 295000
},
{
"epoch": 1.89,
"eval_loss": 2.185188055038452,
"eval_runtime": 18.6065,
"eval_samples_per_second": 107.489,
"eval_steps_per_second": 1.72,
"step": 295000
},
{
"epoch": 1.89,
"learning_rate": 2.6345434994111323e-06,
"loss": 1.9276,
"step": 296000
},
{
"epoch": 1.89,
"eval_loss": 2.1754209995269775,
"eval_runtime": 18.6892,
"eval_samples_per_second": 107.014,
"eval_steps_per_second": 1.712,
"step": 296000
},
{
"epoch": 1.9,
"learning_rate": 2.4745250652875215e-06,
"loss": 1.9214,
"step": 297000
},
{
"epoch": 1.9,
"eval_loss": 2.124568462371826,
"eval_runtime": 18.6607,
"eval_samples_per_second": 107.177,
"eval_steps_per_second": 1.715,
"step": 297000
},
{
"epoch": 1.91,
"learning_rate": 2.3145066311639102e-06,
"loss": 1.9296,
"step": 298000
},
{
"epoch": 1.91,
"eval_loss": 2.1418752670288086,
"eval_runtime": 18.8993,
"eval_samples_per_second": 105.824,
"eval_steps_per_second": 1.693,
"step": 298000
},
{
"epoch": 1.91,
"learning_rate": 2.154488197040299e-06,
"loss": 1.9182,
"step": 299000
},
{
"epoch": 1.91,
"eval_loss": 2.1427695751190186,
"eval_runtime": 18.6439,
"eval_samples_per_second": 107.273,
"eval_steps_per_second": 1.716,
"step": 299000
},
{
"epoch": 1.92,
"learning_rate": 1.994469762916688e-06,
"loss": 1.9172,
"step": 300000
},
{
"epoch": 1.92,
"eval_loss": 2.17488956451416,
"eval_runtime": 20.0248,
"eval_samples_per_second": 99.876,
"eval_steps_per_second": 1.598,
"step": 300000
},
{
"epoch": 1.93,
"learning_rate": 1.834451328793077e-06,
"loss": 1.9054,
"step": 301000
},
{
"epoch": 1.93,
"eval_loss": 2.1516401767730713,
"eval_runtime": 19.1509,
"eval_samples_per_second": 104.434,
"eval_steps_per_second": 1.671,
"step": 301000
},
{
"epoch": 1.93,
"learning_rate": 1.674432894669466e-06,
"loss": 1.9209,
"step": 302000
},
{
"epoch": 1.93,
"eval_loss": 2.1247944831848145,
"eval_runtime": 19.0766,
"eval_samples_per_second": 104.84,
"eval_steps_per_second": 1.677,
"step": 302000
},
{
"epoch": 1.94,
"learning_rate": 1.5144144605458551e-06,
"loss": 1.9191,
"step": 303000
},
{
"epoch": 1.94,
"eval_loss": 2.1422977447509766,
"eval_runtime": 19.0887,
"eval_samples_per_second": 104.774,
"eval_steps_per_second": 1.676,
"step": 303000
},
{
"epoch": 1.95,
"learning_rate": 1.354396026422244e-06,
"loss": 1.9143,
"step": 304000
},
{
"epoch": 1.95,
"eval_loss": 2.1302106380462646,
"eval_runtime": 19.5033,
"eval_samples_per_second": 102.547,
"eval_steps_per_second": 1.641,
"step": 304000
},
{
"epoch": 1.95,
"learning_rate": 1.1943775922986329e-06,
"loss": 1.9163,
"step": 305000
},
{
"epoch": 1.95,
"eval_loss": 2.16552472114563,
"eval_runtime": 18.815,
"eval_samples_per_second": 106.298,
"eval_steps_per_second": 1.701,
"step": 305000
},
{
"epoch": 1.96,
"learning_rate": 1.0343591581750219e-06,
"loss": 1.915,
"step": 306000
},
{
"epoch": 1.96,
"eval_loss": 2.1272425651550293,
"eval_runtime": 19.1159,
"eval_samples_per_second": 104.625,
"eval_steps_per_second": 1.674,
"step": 306000
},
{
"epoch": 1.97,
"learning_rate": 8.743407240514107e-07,
"loss": 1.9193,
"step": 307000
},
{
"epoch": 1.97,
"eval_loss": 2.151264190673828,
"eval_runtime": 18.961,
"eval_samples_per_second": 105.48,
"eval_steps_per_second": 1.688,
"step": 307000
},
{
"epoch": 1.97,
"learning_rate": 7.143222899277997e-07,
"loss": 1.9238,
"step": 308000
},
{
"epoch": 1.97,
"eval_loss": 2.145237922668457,
"eval_runtime": 19.4596,
"eval_samples_per_second": 102.777,
"eval_steps_per_second": 1.644,
"step": 308000
},
{
"epoch": 1.98,
"learning_rate": 5.543038558041887e-07,
"loss": 1.9129,
"step": 309000
},
{
"epoch": 1.98,
"eval_loss": 2.132681369781494,
"eval_runtime": 18.9129,
"eval_samples_per_second": 105.748,
"eval_steps_per_second": 1.692,
"step": 309000
},
{
"epoch": 1.98,
"learning_rate": 3.9428542168057766e-07,
"loss": 1.92,
"step": 310000
},
{
"epoch": 1.98,
"eval_loss": 2.1479594707489014,
"eval_runtime": 18.8663,
"eval_samples_per_second": 106.009,
"eval_steps_per_second": 1.696,
"step": 310000
},
{
"epoch": 1.99,
"learning_rate": 2.342669875569666e-07,
"loss": 1.9098,
"step": 311000
},
{
"epoch": 1.99,
"eval_loss": 2.171926736831665,
"eval_runtime": 19.0151,
"eval_samples_per_second": 105.179,
"eval_steps_per_second": 1.683,
"step": 311000
},
{
"epoch": 2.0,
"learning_rate": 7.424855343335553e-08,
"loss": 1.9105,
"step": 312000
},
{
"epoch": 2.0,
"eval_loss": 2.1461212635040283,
"eval_runtime": 19.4871,
"eval_samples_per_second": 102.632,
"eval_steps_per_second": 1.642,
"step": 312000
},
{
"epoch": 2.0,
"learning_rate": 1.6609486746206498e-05,
"loss": 1.9453,
"step": 313000
},
{
"epoch": 2.0,
"eval_loss": 2.190183162689209,
"eval_runtime": 16.3864,
"eval_samples_per_second": 122.052,
"eval_steps_per_second": 1.953,
"step": 313000
},
{
"epoch": 2.01,
"learning_rate": 1.650280779012409e-05,
"loss": 1.9458,
"step": 314000
},
{
"epoch": 2.01,
"eval_loss": 2.1692402362823486,
"eval_runtime": 15.9646,
"eval_samples_per_second": 125.277,
"eval_steps_per_second": 2.004,
"step": 314000
},
{
"epoch": 2.02,
"learning_rate": 1.639612883404168e-05,
"loss": 1.9428,
"step": 315000
},
{
"epoch": 2.02,
"eval_loss": 2.1538236141204834,
"eval_runtime": 16.0153,
"eval_samples_per_second": 124.881,
"eval_steps_per_second": 1.998,
"step": 315000
},
{
"epoch": 2.02,
"learning_rate": 1.6289449877959276e-05,
"loss": 1.9488,
"step": 316000
},
{
"epoch": 2.02,
"eval_loss": 2.153665542602539,
"eval_runtime": 16.1132,
"eval_samples_per_second": 124.122,
"eval_steps_per_second": 1.986,
"step": 316000
},
{
"epoch": 2.03,
"learning_rate": 1.6182770921876865e-05,
"loss": 1.9437,
"step": 317000
},
{
"epoch": 2.03,
"eval_loss": 2.1973447799682617,
"eval_runtime": 17.5461,
"eval_samples_per_second": 113.986,
"eval_steps_per_second": 1.824,
"step": 317000
},
{
"epoch": 2.04,
"learning_rate": 1.607609196579446e-05,
"loss": 1.9487,
"step": 318000
},
{
"epoch": 2.04,
"eval_loss": 2.1677041053771973,
"eval_runtime": 15.9539,
"eval_samples_per_second": 125.361,
"eval_steps_per_second": 2.006,
"step": 318000
},
{
"epoch": 2.04,
"learning_rate": 1.596941300971205e-05,
"loss": 1.9559,
"step": 319000
},
{
"epoch": 2.04,
"eval_loss": 2.155820369720459,
"eval_runtime": 16.1853,
"eval_samples_per_second": 123.569,
"eval_steps_per_second": 1.977,
"step": 319000
},
{
"epoch": 2.05,
"learning_rate": 1.5862734053629647e-05,
"loss": 1.9662,
"step": 320000
},
{
"epoch": 2.05,
"eval_loss": 2.1629369258880615,
"eval_runtime": 16.6642,
"eval_samples_per_second": 120.018,
"eval_steps_per_second": 1.92,
"step": 320000
},
{
"epoch": 2.05,
"learning_rate": 1.5756055097547238e-05,
"loss": 1.9556,
"step": 321000
},
{
"epoch": 2.05,
"eval_loss": 2.1815614700317383,
"eval_runtime": 16.5814,
"eval_samples_per_second": 120.617,
"eval_steps_per_second": 1.93,
"step": 321000
},
{
"epoch": 2.06,
"learning_rate": 1.564937614146483e-05,
"loss": 1.9512,
"step": 322000
},
{
"epoch": 2.06,
"eval_loss": 2.1164066791534424,
"eval_runtime": 15.9298,
"eval_samples_per_second": 125.551,
"eval_steps_per_second": 2.009,
"step": 322000
},
{
"epoch": 2.07,
"learning_rate": 1.5542697185382425e-05,
"loss": 1.9544,
"step": 323000
},
{
"epoch": 2.07,
"eval_loss": 2.165865659713745,
"eval_runtime": 15.883,
"eval_samples_per_second": 125.921,
"eval_steps_per_second": 2.015,
"step": 323000
},
{
"epoch": 2.07,
"learning_rate": 1.5436018229300017e-05,
"loss": 1.9568,
"step": 324000
},
{
"epoch": 2.07,
"eval_loss": 2.1747090816497803,
"eval_runtime": 15.9331,
"eval_samples_per_second": 125.525,
"eval_steps_per_second": 2.008,
"step": 324000
},
{
"epoch": 2.08,
"learning_rate": 1.5329339273217608e-05,
"loss": 1.9449,
"step": 325000
},
{
"epoch": 2.08,
"eval_loss": 2.1862990856170654,
"eval_runtime": 16.6952,
"eval_samples_per_second": 119.795,
"eval_steps_per_second": 1.917,
"step": 325000
},
{
"epoch": 2.09,
"learning_rate": 1.5222660317135202e-05,
"loss": 1.9491,
"step": 326000
},
{
"epoch": 2.09,
"eval_loss": 2.1621673107147217,
"eval_runtime": 15.9325,
"eval_samples_per_second": 125.529,
"eval_steps_per_second": 2.008,
"step": 326000
},
{
"epoch": 2.09,
"learning_rate": 1.5115981361052795e-05,
"loss": 1.9526,
"step": 327000
},
{
"epoch": 2.09,
"eval_loss": 2.1826863288879395,
"eval_runtime": 15.8228,
"eval_samples_per_second": 126.4,
"eval_steps_per_second": 2.022,
"step": 327000
},
{
"epoch": 2.1,
"learning_rate": 1.5009302404970388e-05,
"loss": 1.952,
"step": 328000
},
{
"epoch": 2.1,
"eval_loss": 2.1913392543792725,
"eval_runtime": 16.1458,
"eval_samples_per_second": 123.871,
"eval_steps_per_second": 1.982,
"step": 328000
},
{
"epoch": 2.11,
"learning_rate": 1.4902623448887978e-05,
"loss": 1.9545,
"step": 329000
},
{
"epoch": 2.11,
"eval_loss": 2.18023681640625,
"eval_runtime": 16.3141,
"eval_samples_per_second": 122.593,
"eval_steps_per_second": 1.961,
"step": 329000
},
{
"epoch": 2.11,
"learning_rate": 1.4795944492805572e-05,
"loss": 1.9616,
"step": 330000
},
{
"epoch": 2.11,
"eval_loss": 2.178854465484619,
"eval_runtime": 15.9442,
"eval_samples_per_second": 125.438,
"eval_steps_per_second": 2.007,
"step": 330000
},
{
"epoch": 2.12,
"learning_rate": 1.4689265536723165e-05,
"loss": 1.9515,
"step": 331000
},
{
"epoch": 2.12,
"eval_loss": 2.1725854873657227,
"eval_runtime": 15.939,
"eval_samples_per_second": 125.478,
"eval_steps_per_second": 2.008,
"step": 331000
},
{
"epoch": 2.13,
"learning_rate": 1.4582586580640758e-05,
"loss": 1.9484,
"step": 332000
},
{
"epoch": 2.13,
"eval_loss": 2.1632540225982666,
"eval_runtime": 16.7042,
"eval_samples_per_second": 119.731,
"eval_steps_per_second": 1.916,
"step": 332000
},
{
"epoch": 2.13,
"learning_rate": 1.4475907624558348e-05,
"loss": 1.962,
"step": 333000
},
{
"epoch": 2.13,
"eval_loss": 2.1514594554901123,
"eval_runtime": 15.9872,
"eval_samples_per_second": 125.1,
"eval_steps_per_second": 2.002,
"step": 333000
},
{
"epoch": 2.14,
"learning_rate": 1.4369228668475942e-05,
"loss": 1.9563,
"step": 334000
},
{
"epoch": 2.14,
"eval_loss": 2.18198299407959,
"eval_runtime": 16.1962,
"eval_samples_per_second": 123.486,
"eval_steps_per_second": 1.976,
"step": 334000
},
{
"epoch": 2.14,
"learning_rate": 1.4262549712393535e-05,
"loss": 1.9544,
"step": 335000
},
{
"epoch": 2.14,
"eval_loss": 2.168269634246826,
"eval_runtime": 16.3411,
"eval_samples_per_second": 122.391,
"eval_steps_per_second": 1.958,
"step": 335000
},
{
"epoch": 2.15,
"learning_rate": 1.4155870756311127e-05,
"loss": 1.9509,
"step": 336000
},
{
"epoch": 2.15,
"eval_loss": 2.157496690750122,
"eval_runtime": 16.7663,
"eval_samples_per_second": 119.287,
"eval_steps_per_second": 1.909,
"step": 336000
},
{
"epoch": 2.16,
"learning_rate": 1.404919180022872e-05,
"loss": 1.9527,
"step": 337000
},
{
"epoch": 2.16,
"eval_loss": 2.162778377532959,
"eval_runtime": 16.4574,
"eval_samples_per_second": 121.526,
"eval_steps_per_second": 1.944,
"step": 337000
},
{
"epoch": 2.16,
"learning_rate": 1.3942512844146313e-05,
"loss": 1.9455,
"step": 338000
},
{
"epoch": 2.16,
"eval_loss": 2.2115304470062256,
"eval_runtime": 15.9425,
"eval_samples_per_second": 125.451,
"eval_steps_per_second": 2.007,
"step": 338000
},
{
"epoch": 2.17,
"learning_rate": 1.3835833888063907e-05,
"loss": 1.9443,
"step": 339000
},
{
"epoch": 2.17,
"eval_loss": 2.1575698852539062,
"eval_runtime": 16.1638,
"eval_samples_per_second": 123.734,
"eval_steps_per_second": 1.98,
"step": 339000
},
{
"epoch": 2.18,
"learning_rate": 1.3729154931981497e-05,
"loss": 1.9471,
"step": 340000
},
{
"epoch": 2.18,
"eval_loss": 2.163440465927124,
"eval_runtime": 16.5887,
"eval_samples_per_second": 120.564,
"eval_steps_per_second": 1.929,
"step": 340000
},
{
"epoch": 2.18,
"learning_rate": 1.362247597589909e-05,
"loss": 1.9385,
"step": 341000
},
{
"epoch": 2.18,
"eval_loss": 2.1808547973632812,
"eval_runtime": 16.0292,
"eval_samples_per_second": 124.773,
"eval_steps_per_second": 1.996,
"step": 341000
},
{
"epoch": 2.19,
"learning_rate": 1.3515797019816683e-05,
"loss": 1.9472,
"step": 342000
},
{
"epoch": 2.19,
"eval_loss": 2.1804370880126953,
"eval_runtime": 16.1599,
"eval_samples_per_second": 123.763,
"eval_steps_per_second": 1.98,
"step": 342000
},
{
"epoch": 2.2,
"learning_rate": 1.3409118063734277e-05,
"loss": 1.9578,
"step": 343000
},
{
"epoch": 2.2,
"eval_loss": 2.172938346862793,
"eval_runtime": 16.4066,
"eval_samples_per_second": 121.902,
"eval_steps_per_second": 1.95,
"step": 343000
},
{
"epoch": 2.2,
"learning_rate": 1.3302439107651868e-05,
"loss": 1.9501,
"step": 344000
},
{
"epoch": 2.2,
"eval_loss": 2.1206016540527344,
"eval_runtime": 16.7126,
"eval_samples_per_second": 119.67,
"eval_steps_per_second": 1.915,
"step": 344000
},
{
"epoch": 2.21,
"learning_rate": 1.3195760151569462e-05,
"loss": 1.9363,
"step": 345000
},
{
"epoch": 2.21,
"eval_loss": 2.1700916290283203,
"eval_runtime": 15.9452,
"eval_samples_per_second": 125.43,
"eval_steps_per_second": 2.007,
"step": 345000
},
{
"epoch": 2.21,
"learning_rate": 1.3089081195487055e-05,
"loss": 1.9452,
"step": 346000
},
{
"epoch": 2.21,
"eval_loss": 2.1466197967529297,
"eval_runtime": 16.0688,
"eval_samples_per_second": 124.465,
"eval_steps_per_second": 1.991,
"step": 346000
},
{
"epoch": 2.22,
"learning_rate": 1.2982402239404649e-05,
"loss": 1.9544,
"step": 347000
},
{
"epoch": 2.22,
"eval_loss": 2.118955135345459,
"eval_runtime": 16.4559,
"eval_samples_per_second": 121.537,
"eval_steps_per_second": 1.945,
"step": 347000
},
{
"epoch": 2.23,
"learning_rate": 1.2875723283322239e-05,
"loss": 1.9442,
"step": 348000
},
{
"epoch": 2.23,
"eval_loss": 2.2223548889160156,
"eval_runtime": 15.9342,
"eval_samples_per_second": 125.516,
"eval_steps_per_second": 2.008,
"step": 348000
},
{
"epoch": 2.23,
"learning_rate": 1.2769044327239832e-05,
"loss": 1.949,
"step": 349000
},
{
"epoch": 2.23,
"eval_loss": 2.1240313053131104,
"eval_runtime": 16.1322,
"eval_samples_per_second": 123.975,
"eval_steps_per_second": 1.984,
"step": 349000
},
{
"epoch": 2.24,
"learning_rate": 1.2662365371157425e-05,
"loss": 1.9524,
"step": 350000
},
{
"epoch": 2.24,
"eval_loss": 2.2078564167022705,
"eval_runtime": 15.9714,
"eval_samples_per_second": 125.224,
"eval_steps_per_second": 2.004,
"step": 350000
},
{
"epoch": 2.25,
"learning_rate": 1.2555686415075015e-05,
"loss": 1.9371,
"step": 351000
},
{
"epoch": 2.25,
"eval_loss": 2.1884605884552,
"eval_runtime": 17.7436,
"eval_samples_per_second": 112.717,
"eval_steps_per_second": 1.803,
"step": 351000
},
{
"epoch": 2.25,
"learning_rate": 1.2449007458992609e-05,
"loss": 1.9474,
"step": 352000
},
{
"epoch": 2.25,
"eval_loss": 2.165747880935669,
"eval_runtime": 15.9774,
"eval_samples_per_second": 125.177,
"eval_steps_per_second": 2.003,
"step": 352000
},
{
"epoch": 2.26,
"learning_rate": 1.2342328502910202e-05,
"loss": 1.9444,
"step": 353000
},
{
"epoch": 2.26,
"eval_loss": 2.180070161819458,
"eval_runtime": 15.9059,
"eval_samples_per_second": 125.74,
"eval_steps_per_second": 2.012,
"step": 353000
},
{
"epoch": 2.27,
"learning_rate": 1.2235649546827795e-05,
"loss": 1.9381,
"step": 354000
},
{
"epoch": 2.27,
"eval_loss": 2.195138931274414,
"eval_runtime": 15.4982,
"eval_samples_per_second": 129.047,
"eval_steps_per_second": 2.065,
"step": 354000
},
{
"epoch": 2.27,
"learning_rate": 1.2128970590745389e-05,
"loss": 1.9462,
"step": 355000
},
{
"epoch": 2.27,
"eval_loss": 2.197645902633667,
"eval_runtime": 16.2722,
"eval_samples_per_second": 122.909,
"eval_steps_per_second": 1.967,
"step": 355000
},
{
"epoch": 2.28,
"learning_rate": 1.202229163466298e-05,
"loss": 1.9312,
"step": 356000
},
{
"epoch": 2.28,
"eval_loss": 2.1800289154052734,
"eval_runtime": 15.5962,
"eval_samples_per_second": 128.236,
"eval_steps_per_second": 2.052,
"step": 356000
},
{
"epoch": 2.29,
"learning_rate": 1.1915612678580574e-05,
"loss": 1.9379,
"step": 357000
},
{
"epoch": 2.29,
"eval_loss": 2.175736427307129,
"eval_runtime": 15.9665,
"eval_samples_per_second": 125.262,
"eval_steps_per_second": 2.004,
"step": 357000
},
{
"epoch": 2.29,
"learning_rate": 1.1808933722498165e-05,
"loss": 1.9435,
"step": 358000
},
{
"epoch": 2.29,
"eval_loss": 2.205449104309082,
"eval_runtime": 15.7121,
"eval_samples_per_second": 127.291,
"eval_steps_per_second": 2.037,
"step": 358000
},
{
"epoch": 2.3,
"learning_rate": 1.1702254766415759e-05,
"loss": 1.9448,
"step": 359000
},
{
"epoch": 2.3,
"eval_loss": 2.173300266265869,
"eval_runtime": 16.397,
"eval_samples_per_second": 121.974,
"eval_steps_per_second": 1.952,
"step": 359000
},
{
"epoch": 2.3,
"learning_rate": 1.159557581033335e-05,
"loss": 1.9529,
"step": 360000
},
{
"epoch": 2.3,
"eval_loss": 2.145735263824463,
"eval_runtime": 15.5694,
"eval_samples_per_second": 128.457,
"eval_steps_per_second": 2.055,
"step": 360000
},
{
"epoch": 2.31,
"learning_rate": 1.1488896854250944e-05,
"loss": 1.9444,
"step": 361000
},
{
"epoch": 2.31,
"eval_loss": 2.1839778423309326,
"eval_runtime": 15.6504,
"eval_samples_per_second": 127.792,
"eval_steps_per_second": 2.045,
"step": 361000
},
{
"epoch": 2.32,
"learning_rate": 1.1382217898168535e-05,
"loss": 1.9439,
"step": 362000
},
{
"epoch": 2.32,
"eval_loss": 2.128485918045044,
"eval_runtime": 15.7866,
"eval_samples_per_second": 126.69,
"eval_steps_per_second": 2.027,
"step": 362000
},
{
"epoch": 2.32,
"learning_rate": 1.1275538942086129e-05,
"loss": 1.9345,
"step": 363000
},
{
"epoch": 2.32,
"eval_loss": 2.16981840133667,
"eval_runtime": 16.0509,
"eval_samples_per_second": 124.604,
"eval_steps_per_second": 1.994,
"step": 363000
},
{
"epoch": 2.33,
"learning_rate": 1.1168859986003722e-05,
"loss": 1.9355,
"step": 364000
},
{
"epoch": 2.33,
"eval_loss": 2.1235830783843994,
"eval_runtime": 15.5068,
"eval_samples_per_second": 128.975,
"eval_steps_per_second": 2.064,
"step": 364000
},
{
"epoch": 2.34,
"learning_rate": 1.1062181029921315e-05,
"loss": 1.9385,
"step": 365000
},
{
"epoch": 2.34,
"eval_loss": 2.1465463638305664,
"eval_runtime": 15.3143,
"eval_samples_per_second": 130.597,
"eval_steps_per_second": 2.09,
"step": 365000
},
{
"epoch": 2.34,
"learning_rate": 1.0955502073838907e-05,
"loss": 1.9425,
"step": 366000
},
{
"epoch": 2.34,
"eval_loss": 2.1613283157348633,
"eval_runtime": 15.466,
"eval_samples_per_second": 129.316,
"eval_steps_per_second": 2.069,
"step": 366000
},
{
"epoch": 2.35,
"learning_rate": 1.08488231177565e-05,
"loss": 1.9304,
"step": 367000
},
{
"epoch": 2.35,
"eval_loss": 2.172750949859619,
"eval_runtime": 15.5842,
"eval_samples_per_second": 128.335,
"eval_steps_per_second": 2.053,
"step": 367000
},
{
"epoch": 2.36,
"learning_rate": 1.0742144161674092e-05,
"loss": 1.9339,
"step": 368000
},
{
"epoch": 2.36,
"eval_loss": 2.148078680038452,
"eval_runtime": 15.9481,
"eval_samples_per_second": 125.407,
"eval_steps_per_second": 2.007,
"step": 368000
},
{
"epoch": 2.36,
"learning_rate": 1.0635465205591686e-05,
"loss": 1.9463,
"step": 369000
},
{
"epoch": 2.36,
"eval_loss": 2.1650550365448,
"eval_runtime": 15.3617,
"eval_samples_per_second": 130.194,
"eval_steps_per_second": 2.083,
"step": 369000
},
{
"epoch": 2.37,
"learning_rate": 1.0528786249509277e-05,
"loss": 1.9407,
"step": 370000
},
{
"epoch": 2.37,
"eval_loss": 2.1432077884674072,
"eval_runtime": 15.1001,
"eval_samples_per_second": 132.45,
"eval_steps_per_second": 2.119,
"step": 370000
},
{
"epoch": 2.37,
"learning_rate": 1.0422107293426869e-05,
"loss": 1.9453,
"step": 371000
},
{
"epoch": 2.37,
"eval_loss": 2.147706985473633,
"eval_runtime": 15.9626,
"eval_samples_per_second": 125.293,
"eval_steps_per_second": 2.005,
"step": 371000
},
{
"epoch": 2.38,
"learning_rate": 1.0315428337344462e-05,
"loss": 1.9368,
"step": 372000
},
{
"epoch": 2.38,
"eval_loss": 2.184664249420166,
"eval_runtime": 15.5454,
"eval_samples_per_second": 128.656,
"eval_steps_per_second": 2.058,
"step": 372000
},
{
"epoch": 2.39,
"learning_rate": 1.0208749381262054e-05,
"loss": 1.9407,
"step": 373000
},
{
"epoch": 2.39,
"eval_loss": 2.1857311725616455,
"eval_runtime": 15.3498,
"eval_samples_per_second": 130.295,
"eval_steps_per_second": 2.085,
"step": 373000
},
{
"epoch": 2.39,
"learning_rate": 1.0102070425179647e-05,
"loss": 1.934,
"step": 374000
},
{
"epoch": 2.39,
"eval_loss": 2.119173765182495,
"eval_runtime": 15.4006,
"eval_samples_per_second": 129.865,
"eval_steps_per_second": 2.078,
"step": 374000
},
{
"epoch": 2.4,
"learning_rate": 9.99539146909724e-06,
"loss": 1.9297,
"step": 375000
},
{
"epoch": 2.4,
"eval_loss": 2.1658694744110107,
"eval_runtime": 15.796,
"eval_samples_per_second": 126.615,
"eval_steps_per_second": 2.026,
"step": 375000
},
{
"epoch": 2.41,
"learning_rate": 9.888712513014834e-06,
"loss": 1.9298,
"step": 376000
},
{
"epoch": 2.41,
"eval_loss": 2.171632766723633,
"eval_runtime": 15.3482,
"eval_samples_per_second": 130.308,
"eval_steps_per_second": 2.085,
"step": 376000
},
{
"epoch": 2.41,
"learning_rate": 9.782033556932426e-06,
"loss": 1.9267,
"step": 377000
},
{
"epoch": 2.41,
"eval_loss": 2.1282413005828857,
"eval_runtime": 15.2611,
"eval_samples_per_second": 131.052,
"eval_steps_per_second": 2.097,
"step": 377000
},
{
"epoch": 2.42,
"learning_rate": 9.675354600850019e-06,
"loss": 1.9387,
"step": 378000
},
{
"epoch": 2.42,
"eval_loss": 2.175699472427368,
"eval_runtime": 15.3352,
"eval_samples_per_second": 130.419,
"eval_steps_per_second": 2.087,
"step": 378000
},
{
"epoch": 2.43,
"learning_rate": 9.56867564476761e-06,
"loss": 1.9235,
"step": 379000
},
{
"epoch": 2.43,
"eval_loss": 2.1758999824523926,
"eval_runtime": 16.089,
"eval_samples_per_second": 124.309,
"eval_steps_per_second": 1.989,
"step": 379000
},
{
"epoch": 2.43,
"learning_rate": 9.461996688685204e-06,
"loss": 1.9265,
"step": 380000
},
{
"epoch": 2.43,
"eval_loss": 2.163534164428711,
"eval_runtime": 15.2326,
"eval_samples_per_second": 131.297,
"eval_steps_per_second": 2.101,
"step": 380000
},
{
"epoch": 2.44,
"learning_rate": 9.355317732602796e-06,
"loss": 1.9151,
"step": 381000
},
{
"epoch": 2.44,
"eval_loss": 2.1671011447906494,
"eval_runtime": 15.2621,
"eval_samples_per_second": 131.044,
"eval_steps_per_second": 2.097,
"step": 381000
},
{
"epoch": 2.45,
"learning_rate": 9.248638776520389e-06,
"loss": 1.9262,
"step": 382000
},
{
"epoch": 2.45,
"eval_loss": 2.144550323486328,
"eval_runtime": 15.6946,
"eval_samples_per_second": 127.432,
"eval_steps_per_second": 2.039,
"step": 382000
},
{
"epoch": 2.45,
"learning_rate": 9.14195982043798e-06,
"loss": 1.9311,
"step": 383000
},
{
"epoch": 2.45,
"eval_loss": 2.1890273094177246,
"eval_runtime": 15.377,
"eval_samples_per_second": 130.065,
"eval_steps_per_second": 2.081,
"step": 383000
},
{
"epoch": 2.46,
"learning_rate": 9.035280864355574e-06,
"loss": 1.9305,
"step": 384000
},
{
"epoch": 2.46,
"eval_loss": 2.166837692260742,
"eval_runtime": 15.3262,
"eval_samples_per_second": 130.496,
"eval_steps_per_second": 2.088,
"step": 384000
},
{
"epoch": 2.46,
"learning_rate": 8.928601908273167e-06,
"loss": 1.9237,
"step": 385000
},
{
"epoch": 2.46,
"eval_loss": 2.0922629833221436,
"eval_runtime": 15.1049,
"eval_samples_per_second": 132.408,
"eval_steps_per_second": 2.119,
"step": 385000
},
{
"epoch": 2.47,
"learning_rate": 8.82192295219076e-06,
"loss": 1.9256,
"step": 386000
},
{
"epoch": 2.47,
"eval_loss": 2.1387295722961426,
"eval_runtime": 15.8611,
"eval_samples_per_second": 126.095,
"eval_steps_per_second": 2.018,
"step": 386000
},
{
"epoch": 2.48,
"learning_rate": 8.715243996108352e-06,
"loss": 1.9339,
"step": 387000
},
{
"epoch": 2.48,
"eval_loss": 2.160367250442505,
"eval_runtime": 15.4895,
"eval_samples_per_second": 129.12,
"eval_steps_per_second": 2.066,
"step": 387000
},
{
"epoch": 2.48,
"learning_rate": 8.608565040025944e-06,
"loss": 1.925,
"step": 388000
},
{
"epoch": 2.48,
"eval_loss": 2.1711387634277344,
"eval_runtime": 15.39,
"eval_samples_per_second": 129.955,
"eval_steps_per_second": 2.079,
"step": 388000
},
{
"epoch": 2.49,
"learning_rate": 8.501886083943537e-06,
"loss": 1.9185,
"step": 389000
},
{
"epoch": 2.49,
"eval_loss": 2.1491212844848633,
"eval_runtime": 15.607,
"eval_samples_per_second": 128.147,
"eval_steps_per_second": 2.05,
"step": 389000
},
{
"epoch": 2.5,
"learning_rate": 8.395207127861129e-06,
"loss": 1.9214,
"step": 390000
},
{
"epoch": 2.5,
"eval_loss": 2.1444971561431885,
"eval_runtime": 15.4605,
"eval_samples_per_second": 129.362,
"eval_steps_per_second": 2.07,
"step": 390000
},
{
"epoch": 2.5,
"learning_rate": 8.288528171778722e-06,
"loss": 1.928,
"step": 391000
},
{
"epoch": 2.5,
"eval_loss": 2.1359145641326904,
"eval_runtime": 15.6126,
"eval_samples_per_second": 128.102,
"eval_steps_per_second": 2.05,
"step": 391000
},
{
"epoch": 2.51,
"learning_rate": 8.181849215696314e-06,
"loss": 1.9243,
"step": 392000
},
{
"epoch": 2.51,
"eval_loss": 2.156005620956421,
"eval_runtime": 15.7238,
"eval_samples_per_second": 127.196,
"eval_steps_per_second": 2.035,
"step": 392000
},
{
"epoch": 2.52,
"learning_rate": 8.075170259613907e-06,
"loss": 1.9096,
"step": 393000
},
{
"epoch": 2.52,
"eval_loss": 2.1110196113586426,
"eval_runtime": 15.176,
"eval_samples_per_second": 131.787,
"eval_steps_per_second": 2.109,
"step": 393000
},
{
"epoch": 2.52,
"learning_rate": 7.9684913035315e-06,
"loss": 1.9254,
"step": 394000
},
{
"epoch": 2.52,
"eval_loss": 2.135141611099243,
"eval_runtime": 15.4036,
"eval_samples_per_second": 129.84,
"eval_steps_per_second": 2.077,
"step": 394000
},
{
"epoch": 2.53,
"learning_rate": 7.861812347449094e-06,
"loss": 1.9214,
"step": 395000
},
{
"epoch": 2.53,
"eval_loss": 2.1366610527038574,
"eval_runtime": 15.7136,
"eval_samples_per_second": 127.279,
"eval_steps_per_second": 2.036,
"step": 395000
},
{
"epoch": 2.53,
"learning_rate": 7.755133391366686e-06,
"loss": 1.9229,
"step": 396000
},
{
"epoch": 2.53,
"eval_loss": 2.1293559074401855,
"eval_runtime": 15.3708,
"eval_samples_per_second": 130.117,
"eval_steps_per_second": 2.082,
"step": 396000
},
{
"epoch": 2.54,
"learning_rate": 7.64845443528428e-06,
"loss": 1.9166,
"step": 397000
},
{
"epoch": 2.54,
"eval_loss": 2.1272215843200684,
"eval_runtime": 15.7644,
"eval_samples_per_second": 126.868,
"eval_steps_per_second": 2.03,
"step": 397000
},
{
"epoch": 2.55,
"learning_rate": 7.541775479201871e-06,
"loss": 1.9152,
"step": 398000
},
{
"epoch": 2.55,
"eval_loss": 2.1080117225646973,
"eval_runtime": 15.3816,
"eval_samples_per_second": 130.026,
"eval_steps_per_second": 2.08,
"step": 398000
},
{
"epoch": 2.55,
"learning_rate": 7.435096523119464e-06,
"loss": 1.9138,
"step": 399000
},
{
"epoch": 2.55,
"eval_loss": 2.156583309173584,
"eval_runtime": 15.5093,
"eval_samples_per_second": 128.955,
"eval_steps_per_second": 2.063,
"step": 399000
},
{
"epoch": 2.56,
"learning_rate": 7.328417567037056e-06,
"loss": 1.9193,
"step": 400000
},
{
"epoch": 2.56,
"eval_loss": 2.1462528705596924,
"eval_runtime": 15.6345,
"eval_samples_per_second": 127.923,
"eval_steps_per_second": 2.047,
"step": 400000
},
{
"epoch": 2.57,
"learning_rate": 7.221738610954649e-06,
"loss": 1.9216,
"step": 401000
},
{
"epoch": 2.57,
"eval_loss": 2.1311724185943604,
"eval_runtime": 15.5304,
"eval_samples_per_second": 128.78,
"eval_steps_per_second": 2.06,
"step": 401000
},
{
"epoch": 2.57,
"learning_rate": 7.115059654872242e-06,
"loss": 1.9171,
"step": 402000
},
{
"epoch": 2.57,
"eval_loss": 2.1334073543548584,
"eval_runtime": 15.6034,
"eval_samples_per_second": 128.177,
"eval_steps_per_second": 2.051,
"step": 402000
},
{
"epoch": 2.58,
"learning_rate": 7.008380698789835e-06,
"loss": 1.9148,
"step": 403000
},
{
"epoch": 2.58,
"eval_loss": 2.1480307579040527,
"eval_runtime": 15.4786,
"eval_samples_per_second": 129.211,
"eval_steps_per_second": 2.067,
"step": 403000
},
{
"epoch": 2.59,
"learning_rate": 6.901701742707427e-06,
"loss": 1.9204,
"step": 404000
},
{
"epoch": 2.59,
"eval_loss": 2.1620922088623047,
"eval_runtime": 17.9933,
"eval_samples_per_second": 111.152,
"eval_steps_per_second": 1.778,
"step": 404000
},
{
"epoch": 2.59,
"learning_rate": 6.79502278662502e-06,
"loss": 1.9163,
"step": 405000
},
{
"epoch": 2.59,
"eval_loss": 2.1261579990386963,
"eval_runtime": 15.7916,
"eval_samples_per_second": 126.65,
"eval_steps_per_second": 2.026,
"step": 405000
},
{
"epoch": 2.6,
"learning_rate": 6.688343830542612e-06,
"loss": 1.9147,
"step": 406000
},
{
"epoch": 2.6,
"eval_loss": 2.134714365005493,
"eval_runtime": 15.563,
"eval_samples_per_second": 128.51,
"eval_steps_per_second": 2.056,
"step": 406000
},
{
"epoch": 2.61,
"learning_rate": 6.581664874460204e-06,
"loss": 1.9107,
"step": 407000
},
{
"epoch": 2.61,
"eval_loss": 2.094939947128296,
"eval_runtime": 15.3395,
"eval_samples_per_second": 130.383,
"eval_steps_per_second": 2.086,
"step": 407000
},
{
"epoch": 2.61,
"learning_rate": 6.474985918377798e-06,
"loss": 1.9185,
"step": 408000
},
{
"epoch": 2.61,
"eval_loss": 2.1135287284851074,
"eval_runtime": 15.2587,
"eval_samples_per_second": 131.072,
"eval_steps_per_second": 2.097,
"step": 408000
},
{
"epoch": 2.62,
"learning_rate": 6.368306962295389e-06,
"loss": 1.9134,
"step": 409000
},
{
"epoch": 2.62,
"eval_loss": 2.1412642002105713,
"eval_runtime": 15.702,
"eval_samples_per_second": 127.372,
"eval_steps_per_second": 2.038,
"step": 409000
},
{
"epoch": 2.62,
"learning_rate": 6.261628006212983e-06,
"loss": 1.9144,
"step": 410000
},
{
"epoch": 2.62,
"eval_loss": 2.1682534217834473,
"eval_runtime": 15.4072,
"eval_samples_per_second": 129.81,
"eval_steps_per_second": 2.077,
"step": 410000
},
{
"epoch": 2.63,
"learning_rate": 6.154949050130575e-06,
"loss": 1.9086,
"step": 411000
},
{
"epoch": 2.63,
"eval_loss": 2.141894578933716,
"eval_runtime": 15.208,
"eval_samples_per_second": 131.51,
"eval_steps_per_second": 2.104,
"step": 411000
},
{
"epoch": 2.64,
"learning_rate": 6.0482700940481686e-06,
"loss": 1.9101,
"step": 412000
},
{
"epoch": 2.64,
"eval_loss": 2.1342506408691406,
"eval_runtime": 15.2405,
"eval_samples_per_second": 131.229,
"eval_steps_per_second": 2.1,
"step": 412000
},
{
"epoch": 2.64,
"learning_rate": 5.941591137965761e-06,
"loss": 1.9086,
"step": 413000
},
{
"epoch": 2.64,
"eval_loss": 2.097320318222046,
"eval_runtime": 15.5657,
"eval_samples_per_second": 128.488,
"eval_steps_per_second": 2.056,
"step": 413000
},
{
"epoch": 2.65,
"learning_rate": 5.8349121818833536e-06,
"loss": 1.9089,
"step": 414000
},
{
"epoch": 2.65,
"eval_loss": 2.1229472160339355,
"eval_runtime": 15.1808,
"eval_samples_per_second": 131.746,
"eval_steps_per_second": 2.108,
"step": 414000
},
{
"epoch": 2.66,
"learning_rate": 5.728233225800946e-06,
"loss": 1.915,
"step": 415000
},
{
"epoch": 2.66,
"eval_loss": 2.1642491817474365,
"eval_runtime": 15.6522,
"eval_samples_per_second": 127.777,
"eval_steps_per_second": 2.044,
"step": 415000
},
{
"epoch": 2.66,
"learning_rate": 5.621554269718539e-06,
"loss": 1.914,
"step": 416000
},
{
"epoch": 2.66,
"eval_loss": 2.1208455562591553,
"eval_runtime": 15.453,
"eval_samples_per_second": 129.425,
"eval_steps_per_second": 2.071,
"step": 416000
},
{
"epoch": 2.67,
"learning_rate": 5.514875313636131e-06,
"loss": 1.9031,
"step": 417000
},
{
"epoch": 2.67,
"eval_loss": 2.103487253189087,
"eval_runtime": 15.4394,
"eval_samples_per_second": 129.539,
"eval_steps_per_second": 2.073,
"step": 417000
},
{
"epoch": 2.68,
"learning_rate": 5.408196357553724e-06,
"loss": 1.9015,
"step": 418000
},
{
"epoch": 2.68,
"eval_loss": 2.1312220096588135,
"eval_runtime": 15.3068,
"eval_samples_per_second": 130.661,
"eval_steps_per_second": 2.091,
"step": 418000
},
{
"epoch": 2.68,
"learning_rate": 5.301517401471316e-06,
"loss": 1.9069,
"step": 419000
},
{
"epoch": 2.68,
"eval_loss": 2.1444790363311768,
"eval_runtime": 15.4574,
"eval_samples_per_second": 129.388,
"eval_steps_per_second": 2.07,
"step": 419000
},
{
"epoch": 2.69,
"learning_rate": 5.194838445388909e-06,
"loss": 1.9016,
"step": 420000
},
{
"epoch": 2.69,
"eval_loss": 2.1105127334594727,
"eval_runtime": 15.3042,
"eval_samples_per_second": 130.683,
"eval_steps_per_second": 2.091,
"step": 420000
},
{
"epoch": 2.69,
"learning_rate": 5.088159489306501e-06,
"loss": 1.8882,
"step": 421000
},
{
"epoch": 2.69,
"eval_loss": 2.151632785797119,
"eval_runtime": 15.8977,
"eval_samples_per_second": 125.805,
"eval_steps_per_second": 2.013,
"step": 421000
},
{
"epoch": 2.7,
"learning_rate": 4.9814805332240945e-06,
"loss": 1.9158,
"step": 422000
},
{
"epoch": 2.7,
"eval_loss": 2.1242105960845947,
"eval_runtime": 15.298,
"eval_samples_per_second": 130.736,
"eval_steps_per_second": 2.092,
"step": 422000
},
{
"epoch": 2.71,
"learning_rate": 4.874801577141687e-06,
"loss": 1.9136,
"step": 423000
},
{
"epoch": 2.71,
"eval_loss": 2.1192123889923096,
"eval_runtime": 15.1175,
"eval_samples_per_second": 132.297,
"eval_steps_per_second": 2.117,
"step": 423000
},
{
"epoch": 2.71,
"learning_rate": 4.7681226210592795e-06,
"loss": 1.916,
"step": 424000
},
{
"epoch": 2.71,
"eval_loss": 2.1400868892669678,
"eval_runtime": 15.3165,
"eval_samples_per_second": 130.578,
"eval_steps_per_second": 2.089,
"step": 424000
},
{
"epoch": 2.72,
"learning_rate": 4.661443664976872e-06,
"loss": 1.8986,
"step": 425000
},
{
"epoch": 2.72,
"eval_loss": 2.158984899520874,
"eval_runtime": 15.2786,
"eval_samples_per_second": 130.902,
"eval_steps_per_second": 2.094,
"step": 425000
},
{
"epoch": 2.73,
"learning_rate": 4.5547647088944646e-06,
"loss": 1.9046,
"step": 426000
},
{
"epoch": 2.73,
"eval_loss": 2.1008715629577637,
"eval_runtime": 15.3482,
"eval_samples_per_second": 130.309,
"eval_steps_per_second": 2.085,
"step": 426000
},
{
"epoch": 2.73,
"learning_rate": 4.448085752812058e-06,
"loss": 1.9019,
"step": 427000
},
{
"epoch": 2.73,
"eval_loss": 2.1234779357910156,
"eval_runtime": 15.3947,
"eval_samples_per_second": 129.915,
"eval_steps_per_second": 2.079,
"step": 427000
},
{
"epoch": 2.74,
"learning_rate": 4.34140679672965e-06,
"loss": 1.9075,
"step": 428000
},
{
"epoch": 2.74,
"eval_loss": 2.1445555686950684,
"eval_runtime": 15.263,
"eval_samples_per_second": 131.036,
"eval_steps_per_second": 2.097,
"step": 428000
},
{
"epoch": 2.75,
"learning_rate": 4.234727840647243e-06,
"loss": 1.9023,
"step": 429000
},
{
"epoch": 2.75,
"eval_loss": 2.1059927940368652,
"eval_runtime": 15.6241,
"eval_samples_per_second": 128.007,
"eval_steps_per_second": 2.048,
"step": 429000
},
{
"epoch": 2.75,
"learning_rate": 4.1280488845648354e-06,
"loss": 1.9096,
"step": 430000
},
{
"epoch": 2.75,
"eval_loss": 2.124612331390381,
"eval_runtime": 15.4182,
"eval_samples_per_second": 129.717,
"eval_steps_per_second": 2.075,
"step": 430000
},
{
"epoch": 2.76,
"learning_rate": 4.021369928482428e-06,
"loss": 1.9021,
"step": 431000
},
{
"epoch": 2.76,
"eval_loss": 2.1339197158813477,
"eval_runtime": 15.3184,
"eval_samples_per_second": 130.562,
"eval_steps_per_second": 2.089,
"step": 431000
},
{
"epoch": 2.77,
"learning_rate": 3.914690972400021e-06,
"loss": 1.9051,
"step": 432000
},
{
"epoch": 2.77,
"eval_loss": 2.150739908218384,
"eval_runtime": 15.3685,
"eval_samples_per_second": 130.137,
"eval_steps_per_second": 2.082,
"step": 432000
},
{
"epoch": 2.77,
"learning_rate": 3.808012016317614e-06,
"loss": 1.8959,
"step": 433000
},
{
"epoch": 2.77,
"eval_loss": 2.1340439319610596,
"eval_runtime": 15.9351,
"eval_samples_per_second": 125.509,
"eval_steps_per_second": 2.008,
"step": 433000
},
{
"epoch": 2.78,
"learning_rate": 3.7013330602352055e-06,
"loss": 1.8924,
"step": 434000
},
{
"epoch": 2.78,
"eval_loss": 2.1609554290771484,
"eval_runtime": 15.2114,
"eval_samples_per_second": 131.48,
"eval_steps_per_second": 2.104,
"step": 434000
},
{
"epoch": 2.78,
"learning_rate": 3.5946541041527984e-06,
"loss": 1.9091,
"step": 435000
},
{
"epoch": 2.78,
"eval_loss": 2.147794008255005,
"eval_runtime": 15.5411,
"eval_samples_per_second": 128.691,
"eval_steps_per_second": 2.059,
"step": 435000
},
{
"epoch": 2.79,
"learning_rate": 3.487975148070391e-06,
"loss": 1.8908,
"step": 436000
},
{
"epoch": 2.79,
"eval_loss": 2.100537061691284,
"eval_runtime": 15.6967,
"eval_samples_per_second": 127.415,
"eval_steps_per_second": 2.039,
"step": 436000
},
{
"epoch": 2.8,
"learning_rate": 3.3812961919879834e-06,
"loss": 1.8946,
"step": 437000
},
{
"epoch": 2.8,
"eval_loss": 2.111453056335449,
"eval_runtime": 15.3824,
"eval_samples_per_second": 130.019,
"eval_steps_per_second": 2.08,
"step": 437000
},
{
"epoch": 2.8,
"learning_rate": 3.2746172359055764e-06,
"loss": 1.8977,
"step": 438000
},
{
"epoch": 2.8,
"eval_loss": 2.130976676940918,
"eval_runtime": 15.1954,
"eval_samples_per_second": 131.618,
"eval_steps_per_second": 2.106,
"step": 438000
},
{
"epoch": 2.81,
"learning_rate": 3.167938279823169e-06,
"loss": 1.9021,
"step": 439000
},
{
"epoch": 2.81,
"eval_loss": 2.1252684593200684,
"eval_runtime": 15.3946,
"eval_samples_per_second": 129.916,
"eval_steps_per_second": 2.079,
"step": 439000
},
{
"epoch": 2.82,
"learning_rate": 3.061259323740762e-06,
"loss": 1.9019,
"step": 440000
},
{
"epoch": 2.82,
"eval_loss": 2.1282765865325928,
"eval_runtime": 15.6319,
"eval_samples_per_second": 127.943,
"eval_steps_per_second": 2.047,
"step": 440000
},
{
"epoch": 2.82,
"learning_rate": 2.9545803676583543e-06,
"loss": 1.8947,
"step": 441000
},
{
"epoch": 2.82,
"eval_loss": 2.1524507999420166,
"eval_runtime": 15.3337,
"eval_samples_per_second": 130.432,
"eval_steps_per_second": 2.087,
"step": 441000
},
{
"epoch": 2.83,
"learning_rate": 2.847901411575947e-06,
"loss": 1.8854,
"step": 442000
},
{
"epoch": 2.83,
"eval_loss": 2.1064517498016357,
"eval_runtime": 15.2656,
"eval_samples_per_second": 131.013,
"eval_steps_per_second": 2.096,
"step": 442000
},
{
"epoch": 2.84,
"learning_rate": 2.7412224554935398e-06,
"loss": 1.9007,
"step": 443000
},
{
"epoch": 2.84,
"eval_loss": 2.0694828033447266,
"eval_runtime": 15.8869,
"eval_samples_per_second": 125.89,
"eval_steps_per_second": 2.014,
"step": 443000
},
{
"epoch": 2.84,
"learning_rate": 2.6345434994111323e-06,
"loss": 1.8981,
"step": 444000
},
{
"epoch": 2.84,
"eval_loss": 2.1273715496063232,
"eval_runtime": 15.1985,
"eval_samples_per_second": 131.592,
"eval_steps_per_second": 2.105,
"step": 444000
},
{
"epoch": 2.85,
"learning_rate": 2.527864543328725e-06,
"loss": 1.8872,
"step": 445000
},
{
"epoch": 2.85,
"eval_loss": 2.1042518615722656,
"eval_runtime": 15.3793,
"eval_samples_per_second": 130.045,
"eval_steps_per_second": 2.081,
"step": 445000
},
{
"epoch": 2.85,
"learning_rate": 2.4211855872463177e-06,
"loss": 1.8957,
"step": 446000
},
{
"epoch": 2.85,
"eval_loss": 2.0750997066497803,
"eval_runtime": 15.5989,
"eval_samples_per_second": 128.214,
"eval_steps_per_second": 2.051,
"step": 446000
},
{
"epoch": 2.86,
"learning_rate": 2.3145066311639102e-06,
"loss": 1.9031,
"step": 447000
},
{
"epoch": 2.86,
"eval_loss": 2.127918004989624,
"eval_runtime": 15.6223,
"eval_samples_per_second": 128.022,
"eval_steps_per_second": 2.048,
"step": 447000
},
{
"epoch": 2.87,
"learning_rate": 2.2078276750815028e-06,
"loss": 1.9001,
"step": 448000
},
{
"epoch": 2.87,
"eval_loss": 2.1019787788391113,
"eval_runtime": 15.4843,
"eval_samples_per_second": 129.163,
"eval_steps_per_second": 2.067,
"step": 448000
},
{
"epoch": 2.87,
"learning_rate": 2.1011487189990953e-06,
"loss": 1.8964,
"step": 449000
},
{
"epoch": 2.87,
"eval_loss": 2.0935049057006836,
"eval_runtime": 15.5998,
"eval_samples_per_second": 128.206,
"eval_steps_per_second": 2.051,
"step": 449000
},
{
"epoch": 2.88,
"learning_rate": 1.994469762916688e-06,
"loss": 1.9003,
"step": 450000
},
{
"epoch": 2.88,
"eval_loss": 2.1466352939605713,
"eval_runtime": 15.1432,
"eval_samples_per_second": 132.072,
"eval_steps_per_second": 2.113,
"step": 450000
},
{
"epoch": 2.89,
"learning_rate": 1.8877908068342807e-06,
"loss": 1.9041,
"step": 451000
},
{
"epoch": 2.89,
"eval_loss": 2.1213934421539307,
"eval_runtime": 15.5486,
"eval_samples_per_second": 128.629,
"eval_steps_per_second": 2.058,
"step": 451000
},
{
"epoch": 2.89,
"learning_rate": 1.7811118507518734e-06,
"loss": 1.8972,
"step": 452000
},
{
"epoch": 2.89,
"eval_loss": 2.139911651611328,
"eval_runtime": 17.4254,
"eval_samples_per_second": 114.775,
"eval_steps_per_second": 1.836,
"step": 452000
},
{
"epoch": 2.9,
"learning_rate": 1.674432894669466e-06,
"loss": 1.9001,
"step": 453000
},
{
"epoch": 2.9,
"eval_loss": 2.1135449409484863,
"eval_runtime": 15.3928,
"eval_samples_per_second": 129.931,
"eval_steps_per_second": 2.079,
"step": 453000
},
{
"epoch": 2.91,
"learning_rate": 1.5677539385870587e-06,
"loss": 1.9034,
"step": 454000
},
{
"epoch": 2.91,
"eval_loss": 2.0974974632263184,
"eval_runtime": 15.5392,
"eval_samples_per_second": 128.707,
"eval_steps_per_second": 2.059,
"step": 454000
},
{
"epoch": 2.91,
"learning_rate": 1.4610749825046512e-06,
"loss": 1.88,
"step": 455000
},
{
"epoch": 2.91,
"eval_loss": 2.086946725845337,
"eval_runtime": 15.3909,
"eval_samples_per_second": 129.947,
"eval_steps_per_second": 2.079,
"step": 455000
},
{
"epoch": 2.92,
"learning_rate": 1.354396026422244e-06,
"loss": 1.894,
"step": 456000
},
{
"epoch": 2.92,
"eval_loss": 2.0814855098724365,
"eval_runtime": 16.0281,
"eval_samples_per_second": 124.781,
"eval_steps_per_second": 1.996,
"step": 456000
},
{
"epoch": 2.93,
"learning_rate": 1.2477170703398366e-06,
"loss": 1.8956,
"step": 457000
},
{
"epoch": 2.93,
"eval_loss": 2.1207478046417236,
"eval_runtime": 16.265,
"eval_samples_per_second": 122.964,
"eval_steps_per_second": 1.967,
"step": 457000
},
{
"epoch": 2.93,
"learning_rate": 1.1410381142574291e-06,
"loss": 1.8882,
"step": 458000
},
{
"epoch": 2.93,
"eval_loss": 2.1136324405670166,
"eval_runtime": 15.2771,
"eval_samples_per_second": 130.915,
"eval_steps_per_second": 2.095,
"step": 458000
},
{
"epoch": 2.94,
"learning_rate": 1.0343591581750219e-06,
"loss": 1.8924,
"step": 459000
},
{
"epoch": 2.94,
"eval_loss": 2.137352466583252,
"eval_runtime": 15.981,
"eval_samples_per_second": 125.149,
"eval_steps_per_second": 2.002,
"step": 459000
},
{
"epoch": 2.94,
"learning_rate": 9.276802020926144e-07,
"loss": 1.8953,
"step": 460000
},
{
"epoch": 2.94,
"eval_loss": 2.1012661457061768,
"eval_runtime": 15.3369,
"eval_samples_per_second": 130.404,
"eval_steps_per_second": 2.086,
"step": 460000
},
{
"epoch": 2.95,
"learning_rate": 8.210012460102071e-07,
"loss": 1.893,
"step": 461000
},
{
"epoch": 2.95,
"eval_loss": 2.135178804397583,
"eval_runtime": 15.8046,
"eval_samples_per_second": 126.546,
"eval_steps_per_second": 2.025,
"step": 461000
},
{
"epoch": 2.96,
"learning_rate": 7.143222899277997e-07,
"loss": 1.8903,
"step": 462000
},
{
"epoch": 2.96,
"eval_loss": 2.1333072185516357,
"eval_runtime": 15.5282,
"eval_samples_per_second": 128.798,
"eval_steps_per_second": 2.061,
"step": 462000
},
{
"epoch": 2.96,
"learning_rate": 6.076433338453923e-07,
"loss": 1.8895,
"step": 463000
},
{
"epoch": 2.96,
"eval_loss": 2.1294093132019043,
"eval_runtime": 15.3716,
"eval_samples_per_second": 130.11,
"eval_steps_per_second": 2.082,
"step": 463000
},
{
"epoch": 2.97,
"learning_rate": 5.009643777629849e-07,
"loss": 1.8939,
"step": 464000
},
{
"epoch": 2.97,
"eval_loss": 2.1235413551330566,
"eval_runtime": 15.3293,
"eval_samples_per_second": 130.469,
"eval_steps_per_second": 2.088,
"step": 464000
},
{
"epoch": 2.98,
"learning_rate": 3.9428542168057766e-07,
"loss": 1.8915,
"step": 465000
},
{
"epoch": 2.98,
"eval_loss": 2.0933895111083984,
"eval_runtime": 15.9617,
"eval_samples_per_second": 125.3,
"eval_steps_per_second": 2.005,
"step": 465000
},
{
"epoch": 2.98,
"learning_rate": 2.8760646559817023e-07,
"loss": 1.8884,
"step": 466000
},
{
"epoch": 2.98,
"eval_loss": 2.1353940963745117,
"eval_runtime": 15.6819,
"eval_samples_per_second": 127.536,
"eval_steps_per_second": 2.041,
"step": 466000
},
{
"epoch": 2.99,
"learning_rate": 1.809275095157629e-07,
"loss": 1.8932,
"step": 467000
},
{
"epoch": 2.99,
"eval_loss": 2.1101338863372803,
"eval_runtime": 15.545,
"eval_samples_per_second": 128.659,
"eval_steps_per_second": 2.059,
"step": 467000
},
{
"epoch": 3.0,
"learning_rate": 7.424855343335553e-08,
"loss": 1.9,
"step": 468000
},
{
"epoch": 3.0,
"eval_loss": 2.130716562271118,
"eval_runtime": 15.4114,
"eval_samples_per_second": 129.774,
"eval_steps_per_second": 2.076,
"step": 468000
},
{
"epoch": 3.0,
"step": 468696,
"total_flos": 6.219491681834838e+18,
"train_loss": 0.6429893864398178,
"train_runtime": 172266.1403,
"train_samples_per_second": 174.128,
"train_steps_per_second": 2.721
}
],
"max_steps": 468696,
"num_train_epochs": 3,
"total_flos": 6.219491681834838e+18,
"trial_name": null,
"trial_params": null
}