{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "global_step": 468696, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 4.967996313175278e-05, "loss": 2.3652, "step": 1000 }, { "epoch": 0.01, "eval_loss": 2.5228402614593506, "eval_runtime": 19.0224, "eval_samples_per_second": 105.139, "eval_steps_per_second": 1.682, "step": 1000 }, { "epoch": 0.01, "learning_rate": 4.935992626350556e-05, "loss": 2.3257, "step": 2000 }, { "epoch": 0.01, "eval_loss": 2.4702858924865723, "eval_runtime": 19.2339, "eval_samples_per_second": 103.983, "eval_steps_per_second": 1.664, "step": 2000 }, { "epoch": 0.02, "learning_rate": 4.903988939525834e-05, "loss": 2.3123, "step": 3000 }, { "epoch": 0.02, "eval_loss": 2.5149893760681152, "eval_runtime": 18.8526, "eval_samples_per_second": 106.086, "eval_steps_per_second": 1.697, "step": 3000 }, { "epoch": 0.03, "learning_rate": 4.871985252701111e-05, "loss": 2.3068, "step": 4000 }, { "epoch": 0.03, "eval_loss": 2.4867441654205322, "eval_runtime": 18.7454, "eval_samples_per_second": 106.693, "eval_steps_per_second": 1.707, "step": 4000 }, { "epoch": 0.03, "learning_rate": 4.8399815658763894e-05, "loss": 2.2906, "step": 5000 }, { "epoch": 0.03, "eval_loss": 2.4581546783447266, "eval_runtime": 19.2143, "eval_samples_per_second": 104.089, "eval_steps_per_second": 1.665, "step": 5000 }, { "epoch": 0.04, "learning_rate": 4.807977879051667e-05, "loss": 2.2817, "step": 6000 }, { "epoch": 0.04, "eval_loss": 2.477738380432129, "eval_runtime": 18.9443, "eval_samples_per_second": 105.573, "eval_steps_per_second": 1.689, "step": 6000 }, { "epoch": 0.04, "learning_rate": 4.7759741922269444e-05, "loss": 2.2713, "step": 7000 }, { "epoch": 0.04, "eval_loss": 2.490509271621704, "eval_runtime": 19.0513, "eval_samples_per_second": 104.98, "eval_steps_per_second": 1.68, "step": 7000 }, { "epoch": 0.05, "learning_rate": 4.743970505402223e-05, "loss": 2.2653, "step": 8000 }, { "epoch": 0.05, "eval_loss": 2.472813129425049, "eval_runtime": 19.2806, "eval_samples_per_second": 103.731, "eval_steps_per_second": 1.66, "step": 8000 }, { "epoch": 0.06, "learning_rate": 4.7119668185775e-05, "loss": 2.2581, "step": 9000 }, { "epoch": 0.06, "eval_loss": 2.4772017002105713, "eval_runtime": 19.5407, "eval_samples_per_second": 102.351, "eval_steps_per_second": 1.638, "step": 9000 }, { "epoch": 0.06, "learning_rate": 4.679963131752778e-05, "loss": 2.2687, "step": 10000 }, { "epoch": 0.06, "eval_loss": 2.4584801197052, "eval_runtime": 18.8505, "eval_samples_per_second": 106.098, "eval_steps_per_second": 1.698, "step": 10000 }, { "epoch": 0.07, "learning_rate": 4.6479594449280565e-05, "loss": 2.2473, "step": 11000 }, { "epoch": 0.07, "eval_loss": 2.450211524963379, "eval_runtime": 19.0047, "eval_samples_per_second": 105.237, "eval_steps_per_second": 1.684, "step": 11000 }, { "epoch": 0.08, "learning_rate": 4.6159557581033336e-05, "loss": 2.2536, "step": 12000 }, { "epoch": 0.08, "eval_loss": 2.402937650680542, "eval_runtime": 19.1856, "eval_samples_per_second": 104.245, "eval_steps_per_second": 1.668, "step": 12000 }, { "epoch": 0.08, "learning_rate": 4.5839520712786115e-05, "loss": 2.2355, "step": 13000 }, { "epoch": 0.08, "eval_loss": 2.5034797191619873, "eval_runtime": 18.9351, "eval_samples_per_second": 105.624, "eval_steps_per_second": 1.69, "step": 13000 }, { "epoch": 0.09, "learning_rate": 4.551948384453889e-05, "loss": 2.2356, "step": 14000 }, { "epoch": 0.09, "eval_loss": 2.443594217300415, "eval_runtime": 19.1979, "eval_samples_per_second": 104.178, "eval_steps_per_second": 1.667, "step": 14000 }, { "epoch": 0.1, "learning_rate": 4.519944697629167e-05, "loss": 2.2385, "step": 15000 }, { "epoch": 0.1, "eval_loss": 2.4230918884277344, "eval_runtime": 19.1941, "eval_samples_per_second": 104.199, "eval_steps_per_second": 1.667, "step": 15000 }, { "epoch": 0.1, "learning_rate": 4.487941010804445e-05, "loss": 2.229, "step": 16000 }, { "epoch": 0.1, "eval_loss": 2.435939073562622, "eval_runtime": 18.7132, "eval_samples_per_second": 106.876, "eval_steps_per_second": 1.71, "step": 16000 }, { "epoch": 0.11, "learning_rate": 4.455937323979723e-05, "loss": 2.2308, "step": 17000 }, { "epoch": 0.11, "eval_loss": 2.379002094268799, "eval_runtime": 18.8323, "eval_samples_per_second": 106.2, "eval_steps_per_second": 1.699, "step": 17000 }, { "epoch": 0.12, "learning_rate": 4.4239336371550006e-05, "loss": 2.2247, "step": 18000 }, { "epoch": 0.12, "eval_loss": 2.440680742263794, "eval_runtime": 18.8124, "eval_samples_per_second": 106.313, "eval_steps_per_second": 1.701, "step": 18000 }, { "epoch": 0.12, "learning_rate": 4.391929950330278e-05, "loss": 2.2262, "step": 19000 }, { "epoch": 0.12, "eval_loss": 2.401104211807251, "eval_runtime": 18.8589, "eval_samples_per_second": 106.051, "eval_steps_per_second": 1.697, "step": 19000 }, { "epoch": 0.13, "learning_rate": 4.359926263505556e-05, "loss": 2.2074, "step": 20000 }, { "epoch": 0.13, "eval_loss": 2.382688522338867, "eval_runtime": 18.7139, "eval_samples_per_second": 106.872, "eval_steps_per_second": 1.71, "step": 20000 }, { "epoch": 0.13, "learning_rate": 4.327922576680834e-05, "loss": 2.2204, "step": 21000 }, { "epoch": 0.13, "eval_loss": 2.421189308166504, "eval_runtime": 18.9386, "eval_samples_per_second": 105.604, "eval_steps_per_second": 1.69, "step": 21000 }, { "epoch": 0.14, "learning_rate": 4.295918889856111e-05, "loss": 2.2123, "step": 22000 }, { "epoch": 0.14, "eval_loss": 2.4362740516662598, "eval_runtime": 18.9745, "eval_samples_per_second": 105.405, "eval_steps_per_second": 1.686, "step": 22000 }, { "epoch": 0.15, "learning_rate": 4.263915203031389e-05, "loss": 2.2225, "step": 23000 }, { "epoch": 0.15, "eval_loss": 2.426682710647583, "eval_runtime": 19.1794, "eval_samples_per_second": 104.278, "eval_steps_per_second": 1.668, "step": 23000 }, { "epoch": 0.15, "learning_rate": 4.231911516206668e-05, "loss": 2.2137, "step": 24000 }, { "epoch": 0.15, "eval_loss": 2.4169669151306152, "eval_runtime": 18.8197, "eval_samples_per_second": 106.272, "eval_steps_per_second": 1.7, "step": 24000 }, { "epoch": 0.16, "learning_rate": 4.199907829381945e-05, "loss": 2.2143, "step": 25000 }, { "epoch": 0.16, "eval_loss": 2.4082441329956055, "eval_runtime": 18.9737, "eval_samples_per_second": 105.409, "eval_steps_per_second": 1.687, "step": 25000 }, { "epoch": 0.17, "learning_rate": 4.167904142557223e-05, "loss": 2.2131, "step": 26000 }, { "epoch": 0.17, "eval_loss": 2.4836766719818115, "eval_runtime": 19.0574, "eval_samples_per_second": 104.946, "eval_steps_per_second": 1.679, "step": 26000 }, { "epoch": 0.17, "learning_rate": 4.1359004557325005e-05, "loss": 2.1954, "step": 27000 }, { "epoch": 0.17, "eval_loss": 2.43381404876709, "eval_runtime": 18.8859, "eval_samples_per_second": 105.899, "eval_steps_per_second": 1.694, "step": 27000 }, { "epoch": 0.18, "learning_rate": 4.1038967689077783e-05, "loss": 2.1934, "step": 28000 }, { "epoch": 0.18, "eval_loss": 2.4075064659118652, "eval_runtime": 18.689, "eval_samples_per_second": 107.015, "eval_steps_per_second": 1.712, "step": 28000 }, { "epoch": 0.19, "learning_rate": 4.071893082083056e-05, "loss": 2.1943, "step": 29000 }, { "epoch": 0.19, "eval_loss": 2.383098602294922, "eval_runtime": 18.8218, "eval_samples_per_second": 106.26, "eval_steps_per_second": 1.7, "step": 29000 }, { "epoch": 0.19, "learning_rate": 4.039889395258334e-05, "loss": 2.1944, "step": 30000 }, { "epoch": 0.19, "eval_loss": 2.3953185081481934, "eval_runtime": 18.9451, "eval_samples_per_second": 105.568, "eval_steps_per_second": 1.689, "step": 30000 }, { "epoch": 0.2, "learning_rate": 4.007885708433612e-05, "loss": 2.1914, "step": 31000 }, { "epoch": 0.2, "eval_loss": 2.411050796508789, "eval_runtime": 18.7128, "eval_samples_per_second": 106.878, "eval_steps_per_second": 1.71, "step": 31000 }, { "epoch": 0.2, "learning_rate": 3.975882021608889e-05, "loss": 2.1865, "step": 32000 }, { "epoch": 0.2, "eval_loss": 2.390427827835083, "eval_runtime": 18.9045, "eval_samples_per_second": 105.795, "eval_steps_per_second": 1.693, "step": 32000 }, { "epoch": 0.21, "learning_rate": 3.9438783347841675e-05, "loss": 2.1871, "step": 33000 }, { "epoch": 0.21, "eval_loss": 2.401388168334961, "eval_runtime": 18.7096, "eval_samples_per_second": 106.897, "eval_steps_per_second": 1.71, "step": 33000 }, { "epoch": 0.22, "learning_rate": 3.9118746479594454e-05, "loss": 2.1792, "step": 34000 }, { "epoch": 0.22, "eval_loss": 2.4562745094299316, "eval_runtime": 18.8567, "eval_samples_per_second": 106.063, "eval_steps_per_second": 1.697, "step": 34000 }, { "epoch": 0.22, "learning_rate": 3.8798709611347225e-05, "loss": 2.1921, "step": 35000 }, { "epoch": 0.22, "eval_loss": 2.399921417236328, "eval_runtime": 18.7883, "eval_samples_per_second": 106.449, "eval_steps_per_second": 1.703, "step": 35000 }, { "epoch": 0.23, "learning_rate": 3.847867274310001e-05, "loss": 2.1831, "step": 36000 }, { "epoch": 0.23, "eval_loss": 2.3935768604278564, "eval_runtime": 18.8237, "eval_samples_per_second": 106.249, "eval_steps_per_second": 1.7, "step": 36000 }, { "epoch": 0.24, "learning_rate": 3.815863587485278e-05, "loss": 2.169, "step": 37000 }, { "epoch": 0.24, "eval_loss": 2.385082960128784, "eval_runtime": 18.9677, "eval_samples_per_second": 105.442, "eval_steps_per_second": 1.687, "step": 37000 }, { "epoch": 0.24, "learning_rate": 3.783859900660556e-05, "loss": 2.1619, "step": 38000 }, { "epoch": 0.24, "eval_loss": 2.3289620876312256, "eval_runtime": 19.0182, "eval_samples_per_second": 105.162, "eval_steps_per_second": 1.683, "step": 38000 }, { "epoch": 0.25, "learning_rate": 3.7518562138358346e-05, "loss": 2.1651, "step": 39000 }, { "epoch": 0.25, "eval_loss": 2.3818867206573486, "eval_runtime": 18.9593, "eval_samples_per_second": 105.489, "eval_steps_per_second": 1.688, "step": 39000 }, { "epoch": 0.26, "learning_rate": 3.719852527011112e-05, "loss": 2.1704, "step": 40000 }, { "epoch": 0.26, "eval_loss": 2.3583953380584717, "eval_runtime": 18.8577, "eval_samples_per_second": 106.057, "eval_steps_per_second": 1.697, "step": 40000 }, { "epoch": 0.26, "learning_rate": 3.6878488401863896e-05, "loss": 2.1601, "step": 41000 }, { "epoch": 0.26, "eval_loss": 2.3705227375030518, "eval_runtime": 19.0038, "eval_samples_per_second": 105.242, "eval_steps_per_second": 1.684, "step": 41000 }, { "epoch": 0.27, "learning_rate": 3.6558451533616674e-05, "loss": 2.1819, "step": 42000 }, { "epoch": 0.27, "eval_loss": 2.3806064128875732, "eval_runtime": 19.09, "eval_samples_per_second": 104.767, "eval_steps_per_second": 1.676, "step": 42000 }, { "epoch": 0.28, "learning_rate": 3.623841466536945e-05, "loss": 2.1666, "step": 43000 }, { "epoch": 0.28, "eval_loss": 2.3670589923858643, "eval_runtime": 18.9485, "eval_samples_per_second": 105.549, "eval_steps_per_second": 1.689, "step": 43000 }, { "epoch": 0.28, "learning_rate": 3.591837779712223e-05, "loss": 2.1718, "step": 44000 }, { "epoch": 0.28, "eval_loss": 2.364011764526367, "eval_runtime": 18.7665, "eval_samples_per_second": 106.573, "eval_steps_per_second": 1.705, "step": 44000 }, { "epoch": 0.29, "learning_rate": 3.559834092887501e-05, "loss": 2.1521, "step": 45000 }, { "epoch": 0.29, "eval_loss": 2.373670816421509, "eval_runtime": 18.9014, "eval_samples_per_second": 105.812, "eval_steps_per_second": 1.693, "step": 45000 }, { "epoch": 0.29, "learning_rate": 3.527830406062779e-05, "loss": 2.148, "step": 46000 }, { "epoch": 0.29, "eval_loss": 2.377063035964966, "eval_runtime": 19.012, "eval_samples_per_second": 105.197, "eval_steps_per_second": 1.683, "step": 46000 }, { "epoch": 0.3, "learning_rate": 3.495826719238056e-05, "loss": 2.1438, "step": 47000 }, { "epoch": 0.3, "eval_loss": 2.3637826442718506, "eval_runtime": 18.982, "eval_samples_per_second": 105.363, "eval_steps_per_second": 1.686, "step": 47000 }, { "epoch": 0.31, "learning_rate": 3.4638230324133344e-05, "loss": 2.1536, "step": 48000 }, { "epoch": 0.31, "eval_loss": 2.3571810722351074, "eval_runtime": 18.8471, "eval_samples_per_second": 106.117, "eval_steps_per_second": 1.698, "step": 48000 }, { "epoch": 0.31, "learning_rate": 3.431819345588612e-05, "loss": 2.1505, "step": 49000 }, { "epoch": 0.31, "eval_loss": 2.3516790866851807, "eval_runtime": 18.8575, "eval_samples_per_second": 106.059, "eval_steps_per_second": 1.697, "step": 49000 }, { "epoch": 0.32, "learning_rate": 3.3998156587638894e-05, "loss": 2.1319, "step": 50000 }, { "epoch": 0.32, "eval_loss": 2.3615307807922363, "eval_runtime": 19.1166, "eval_samples_per_second": 104.621, "eval_steps_per_second": 1.674, "step": 50000 }, { "epoch": 0.33, "learning_rate": 3.367811971939168e-05, "loss": 2.123, "step": 51000 }, { "epoch": 0.33, "eval_loss": 2.3522212505340576, "eval_runtime": 19.1501, "eval_samples_per_second": 104.438, "eval_steps_per_second": 1.671, "step": 51000 }, { "epoch": 0.33, "learning_rate": 3.335808285114445e-05, "loss": 2.1513, "step": 52000 }, { "epoch": 0.33, "eval_loss": 2.388401746749878, "eval_runtime": 18.8344, "eval_samples_per_second": 106.189, "eval_steps_per_second": 1.699, "step": 52000 }, { "epoch": 0.34, "learning_rate": 3.303804598289723e-05, "loss": 2.1419, "step": 53000 }, { "epoch": 0.34, "eval_loss": 2.32639479637146, "eval_runtime": 18.8162, "eval_samples_per_second": 106.292, "eval_steps_per_second": 1.701, "step": 53000 }, { "epoch": 0.35, "learning_rate": 3.271800911465001e-05, "loss": 2.1404, "step": 54000 }, { "epoch": 0.35, "eval_loss": 2.3595259189605713, "eval_runtime": 18.8272, "eval_samples_per_second": 106.229, "eval_steps_per_second": 1.7, "step": 54000 }, { "epoch": 0.35, "learning_rate": 3.2397972246402786e-05, "loss": 2.128, "step": 55000 }, { "epoch": 0.35, "eval_loss": 2.3471484184265137, "eval_runtime": 18.9594, "eval_samples_per_second": 105.489, "eval_steps_per_second": 1.688, "step": 55000 }, { "epoch": 0.36, "learning_rate": 3.2077935378155565e-05, "loss": 2.1287, "step": 56000 }, { "epoch": 0.36, "eval_loss": 2.347370147705078, "eval_runtime": 18.9278, "eval_samples_per_second": 105.665, "eval_steps_per_second": 1.691, "step": 56000 }, { "epoch": 0.36, "learning_rate": 3.175789850990834e-05, "loss": 2.1372, "step": 57000 }, { "epoch": 0.36, "eval_loss": 2.3139336109161377, "eval_runtime": 19.0473, "eval_samples_per_second": 105.002, "eval_steps_per_second": 1.68, "step": 57000 }, { "epoch": 0.37, "learning_rate": 3.143786164166112e-05, "loss": 2.1301, "step": 58000 }, { "epoch": 0.37, "eval_loss": 2.3145127296447754, "eval_runtime": 18.831, "eval_samples_per_second": 106.208, "eval_steps_per_second": 1.699, "step": 58000 }, { "epoch": 0.38, "learning_rate": 3.11178247734139e-05, "loss": 2.128, "step": 59000 }, { "epoch": 0.38, "eval_loss": 2.3634743690490723, "eval_runtime": 19.0052, "eval_samples_per_second": 105.234, "eval_steps_per_second": 1.684, "step": 59000 }, { "epoch": 0.38, "learning_rate": 3.079778790516668e-05, "loss": 2.1088, "step": 60000 }, { "epoch": 0.38, "eval_loss": 2.3068251609802246, "eval_runtime": 18.9935, "eval_samples_per_second": 105.299, "eval_steps_per_second": 1.685, "step": 60000 }, { "epoch": 0.39, "learning_rate": 3.0477751036919456e-05, "loss": 2.122, "step": 61000 }, { "epoch": 0.39, "eval_loss": 2.312502145767212, "eval_runtime": 18.8963, "eval_samples_per_second": 105.841, "eval_steps_per_second": 1.693, "step": 61000 }, { "epoch": 0.4, "learning_rate": 3.015771416867223e-05, "loss": 2.1113, "step": 62000 }, { "epoch": 0.4, "eval_loss": 2.3446314334869385, "eval_runtime": 18.8671, "eval_samples_per_second": 106.005, "eval_steps_per_second": 1.696, "step": 62000 }, { "epoch": 0.4, "learning_rate": 2.983767730042501e-05, "loss": 2.1108, "step": 63000 }, { "epoch": 0.4, "eval_loss": 2.3173420429229736, "eval_runtime": 18.7418, "eval_samples_per_second": 106.713, "eval_steps_per_second": 1.707, "step": 63000 }, { "epoch": 0.41, "learning_rate": 2.951764043217779e-05, "loss": 2.125, "step": 64000 }, { "epoch": 0.41, "eval_loss": 2.363111734390259, "eval_runtime": 18.789, "eval_samples_per_second": 106.445, "eval_steps_per_second": 1.703, "step": 64000 }, { "epoch": 0.42, "learning_rate": 2.9197603563930563e-05, "loss": 2.1106, "step": 65000 }, { "epoch": 0.42, "eval_loss": 2.331869602203369, "eval_runtime": 18.9057, "eval_samples_per_second": 105.788, "eval_steps_per_second": 1.693, "step": 65000 }, { "epoch": 0.42, "learning_rate": 2.8877566695683345e-05, "loss": 2.1143, "step": 66000 }, { "epoch": 0.42, "eval_loss": 2.300299882888794, "eval_runtime": 18.7948, "eval_samples_per_second": 106.413, "eval_steps_per_second": 1.703, "step": 66000 }, { "epoch": 0.43, "learning_rate": 2.8557529827436123e-05, "loss": 2.0982, "step": 67000 }, { "epoch": 0.43, "eval_loss": 2.3044443130493164, "eval_runtime": 19.1803, "eval_samples_per_second": 104.273, "eval_steps_per_second": 1.668, "step": 67000 }, { "epoch": 0.44, "learning_rate": 2.82374929591889e-05, "loss": 2.1026, "step": 68000 }, { "epoch": 0.44, "eval_loss": 2.305398464202881, "eval_runtime": 18.9121, "eval_samples_per_second": 105.752, "eval_steps_per_second": 1.692, "step": 68000 }, { "epoch": 0.44, "learning_rate": 2.791745609094168e-05, "loss": 2.0995, "step": 69000 }, { "epoch": 0.44, "eval_loss": 2.3068206310272217, "eval_runtime": 18.8989, "eval_samples_per_second": 105.826, "eval_steps_per_second": 1.693, "step": 69000 }, { "epoch": 0.45, "learning_rate": 2.7597419222694455e-05, "loss": 2.0844, "step": 70000 }, { "epoch": 0.45, "eval_loss": 2.3477184772491455, "eval_runtime": 19.0274, "eval_samples_per_second": 105.111, "eval_steps_per_second": 1.682, "step": 70000 }, { "epoch": 0.45, "learning_rate": 2.7277382354447233e-05, "loss": 2.1008, "step": 71000 }, { "epoch": 0.45, "eval_loss": 2.339860439300537, "eval_runtime": 18.7939, "eval_samples_per_second": 106.418, "eval_steps_per_second": 1.703, "step": 71000 }, { "epoch": 0.46, "learning_rate": 2.6957345486200015e-05, "loss": 2.092, "step": 72000 }, { "epoch": 0.46, "eval_loss": 2.3236074447631836, "eval_runtime": 18.7746, "eval_samples_per_second": 106.527, "eval_steps_per_second": 1.704, "step": 72000 }, { "epoch": 0.47, "learning_rate": 2.663730861795279e-05, "loss": 2.09, "step": 73000 }, { "epoch": 0.47, "eval_loss": 2.3070333003997803, "eval_runtime": 19.3882, "eval_samples_per_second": 103.155, "eval_steps_per_second": 1.65, "step": 73000 }, { "epoch": 0.47, "learning_rate": 2.631727174970557e-05, "loss": 2.0984, "step": 74000 }, { "epoch": 0.47, "eval_loss": 2.31845760345459, "eval_runtime": 19.5362, "eval_samples_per_second": 102.374, "eval_steps_per_second": 1.638, "step": 74000 }, { "epoch": 0.48, "learning_rate": 2.5997234881458344e-05, "loss": 2.0965, "step": 75000 }, { "epoch": 0.48, "eval_loss": 2.306812047958374, "eval_runtime": 19.4702, "eval_samples_per_second": 102.721, "eval_steps_per_second": 1.644, "step": 75000 }, { "epoch": 0.49, "learning_rate": 2.5677198013211122e-05, "loss": 2.081, "step": 76000 }, { "epoch": 0.49, "eval_loss": 2.274367570877075, "eval_runtime": 19.5806, "eval_samples_per_second": 102.142, "eval_steps_per_second": 1.634, "step": 76000 }, { "epoch": 0.49, "learning_rate": 2.5357161144963904e-05, "loss": 2.0871, "step": 77000 }, { "epoch": 0.49, "eval_loss": 2.254237651824951, "eval_runtime": 19.7552, "eval_samples_per_second": 101.239, "eval_steps_per_second": 1.62, "step": 77000 }, { "epoch": 0.5, "learning_rate": 2.503712427671668e-05, "loss": 2.0751, "step": 78000 }, { "epoch": 0.5, "eval_loss": 2.2817015647888184, "eval_runtime": 19.5765, "eval_samples_per_second": 102.163, "eval_steps_per_second": 1.635, "step": 78000 }, { "epoch": 0.51, "learning_rate": 2.4717087408469457e-05, "loss": 2.0875, "step": 79000 }, { "epoch": 0.51, "eval_loss": 2.288637161254883, "eval_runtime": 19.6173, "eval_samples_per_second": 101.951, "eval_steps_per_second": 1.631, "step": 79000 }, { "epoch": 0.51, "learning_rate": 2.4397050540222236e-05, "loss": 2.0847, "step": 80000 }, { "epoch": 0.51, "eval_loss": 2.3093936443328857, "eval_runtime": 19.3962, "eval_samples_per_second": 103.113, "eval_steps_per_second": 1.65, "step": 80000 }, { "epoch": 0.52, "learning_rate": 2.4077013671975014e-05, "loss": 2.0861, "step": 81000 }, { "epoch": 0.52, "eval_loss": 2.294950246810913, "eval_runtime": 19.5483, "eval_samples_per_second": 102.311, "eval_steps_per_second": 1.637, "step": 81000 }, { "epoch": 0.52, "learning_rate": 2.375697680372779e-05, "loss": 2.0689, "step": 82000 }, { "epoch": 0.52, "eval_loss": 2.293389320373535, "eval_runtime": 19.51, "eval_samples_per_second": 102.512, "eval_steps_per_second": 1.64, "step": 82000 }, { "epoch": 0.53, "learning_rate": 2.3436939935480567e-05, "loss": 2.0767, "step": 83000 }, { "epoch": 0.53, "eval_loss": 2.304983615875244, "eval_runtime": 19.26, "eval_samples_per_second": 103.842, "eval_steps_per_second": 1.661, "step": 83000 }, { "epoch": 0.54, "learning_rate": 2.311690306723335e-05, "loss": 2.0711, "step": 84000 }, { "epoch": 0.54, "eval_loss": 2.2823355197906494, "eval_runtime": 20.4429, "eval_samples_per_second": 97.834, "eval_steps_per_second": 1.565, "step": 84000 }, { "epoch": 0.54, "learning_rate": 2.2796866198986124e-05, "loss": 2.0654, "step": 85000 }, { "epoch": 0.54, "eval_loss": 2.280226469039917, "eval_runtime": 19.5501, "eval_samples_per_second": 102.301, "eval_steps_per_second": 1.637, "step": 85000 }, { "epoch": 0.55, "learning_rate": 2.2476829330738902e-05, "loss": 2.0627, "step": 86000 }, { "epoch": 0.55, "eval_loss": 2.2770543098449707, "eval_runtime": 19.4549, "eval_samples_per_second": 102.802, "eval_steps_per_second": 1.645, "step": 86000 }, { "epoch": 0.56, "learning_rate": 2.215679246249168e-05, "loss": 2.0656, "step": 87000 }, { "epoch": 0.56, "eval_loss": 2.2922134399414062, "eval_runtime": 19.3407, "eval_samples_per_second": 103.409, "eval_steps_per_second": 1.655, "step": 87000 }, { "epoch": 0.56, "learning_rate": 2.1836755594244456e-05, "loss": 2.07, "step": 88000 }, { "epoch": 0.56, "eval_loss": 2.268709897994995, "eval_runtime": 19.4551, "eval_samples_per_second": 102.801, "eval_steps_per_second": 1.645, "step": 88000 }, { "epoch": 0.57, "learning_rate": 2.1516718725997238e-05, "loss": 2.0661, "step": 89000 }, { "epoch": 0.57, "eval_loss": 2.247802972793579, "eval_runtime": 19.273, "eval_samples_per_second": 103.772, "eval_steps_per_second": 1.66, "step": 89000 }, { "epoch": 0.58, "learning_rate": 2.1196681857750016e-05, "loss": 2.0511, "step": 90000 }, { "epoch": 0.58, "eval_loss": 2.3074941635131836, "eval_runtime": 19.2075, "eval_samples_per_second": 104.126, "eval_steps_per_second": 1.666, "step": 90000 }, { "epoch": 0.58, "learning_rate": 2.087664498950279e-05, "loss": 2.0582, "step": 91000 }, { "epoch": 0.58, "eval_loss": 2.248690605163574, "eval_runtime": 19.2432, "eval_samples_per_second": 103.933, "eval_steps_per_second": 1.663, "step": 91000 }, { "epoch": 0.59, "learning_rate": 2.055660812125557e-05, "loss": 2.0626, "step": 92000 }, { "epoch": 0.59, "eval_loss": 2.2588484287261963, "eval_runtime": 19.4441, "eval_samples_per_second": 102.859, "eval_steps_per_second": 1.646, "step": 92000 }, { "epoch": 0.6, "learning_rate": 2.0236571253008348e-05, "loss": 2.0562, "step": 93000 }, { "epoch": 0.6, "eval_loss": 2.274319887161255, "eval_runtime": 19.4979, "eval_samples_per_second": 102.575, "eval_steps_per_second": 1.641, "step": 93000 }, { "epoch": 0.6, "learning_rate": 1.9916534384761126e-05, "loss": 2.0511, "step": 94000 }, { "epoch": 0.6, "eval_loss": 2.276171922683716, "eval_runtime": 19.331, "eval_samples_per_second": 103.461, "eval_steps_per_second": 1.655, "step": 94000 }, { "epoch": 0.61, "learning_rate": 1.9596497516513904e-05, "loss": 2.0413, "step": 95000 }, { "epoch": 0.61, "eval_loss": 2.2398881912231445, "eval_runtime": 19.5099, "eval_samples_per_second": 102.512, "eval_steps_per_second": 1.64, "step": 95000 }, { "epoch": 0.61, "learning_rate": 1.927646064826668e-05, "loss": 2.0496, "step": 96000 }, { "epoch": 0.61, "eval_loss": 2.271150588989258, "eval_runtime": 19.317, "eval_samples_per_second": 103.536, "eval_steps_per_second": 1.657, "step": 96000 }, { "epoch": 0.62, "learning_rate": 1.8956423780019458e-05, "loss": 2.0564, "step": 97000 }, { "epoch": 0.62, "eval_loss": 2.2770469188690186, "eval_runtime": 19.1141, "eval_samples_per_second": 104.635, "eval_steps_per_second": 1.674, "step": 97000 }, { "epoch": 0.63, "learning_rate": 1.863638691177224e-05, "loss": 2.0505, "step": 98000 }, { "epoch": 0.63, "eval_loss": 2.2885847091674805, "eval_runtime": 19.6608, "eval_samples_per_second": 101.725, "eval_steps_per_second": 1.628, "step": 98000 }, { "epoch": 0.63, "learning_rate": 1.8316350043525015e-05, "loss": 2.0504, "step": 99000 }, { "epoch": 0.63, "eval_loss": 2.3180038928985596, "eval_runtime": 19.4021, "eval_samples_per_second": 103.082, "eval_steps_per_second": 1.649, "step": 99000 }, { "epoch": 0.64, "learning_rate": 1.7996313175277793e-05, "loss": 2.0439, "step": 100000 }, { "epoch": 0.64, "eval_loss": 2.2651731967926025, "eval_runtime": 19.3214, "eval_samples_per_second": 103.512, "eval_steps_per_second": 1.656, "step": 100000 }, { "epoch": 0.65, "learning_rate": 1.767627630703057e-05, "loss": 2.0461, "step": 101000 }, { "epoch": 0.65, "eval_loss": 2.222968101501465, "eval_runtime": 19.2774, "eval_samples_per_second": 103.749, "eval_steps_per_second": 1.66, "step": 101000 }, { "epoch": 0.65, "learning_rate": 1.7356239438783346e-05, "loss": 2.0405, "step": 102000 }, { "epoch": 0.65, "eval_loss": 2.2448790073394775, "eval_runtime": 21.2727, "eval_samples_per_second": 94.017, "eval_steps_per_second": 1.504, "step": 102000 }, { "epoch": 0.66, "learning_rate": 1.7036202570536128e-05, "loss": 2.038, "step": 103000 }, { "epoch": 0.66, "eval_loss": 2.2096433639526367, "eval_runtime": 19.292, "eval_samples_per_second": 103.67, "eval_steps_per_second": 1.659, "step": 103000 }, { "epoch": 0.67, "learning_rate": 1.6716165702288906e-05, "loss": 2.0205, "step": 104000 }, { "epoch": 0.67, "eval_loss": 2.2131240367889404, "eval_runtime": 19.1995, "eval_samples_per_second": 104.169, "eval_steps_per_second": 1.667, "step": 104000 }, { "epoch": 0.67, "learning_rate": 1.639612883404168e-05, "loss": 2.0196, "step": 105000 }, { "epoch": 0.67, "eval_loss": 2.2505383491516113, "eval_runtime": 19.4936, "eval_samples_per_second": 102.598, "eval_steps_per_second": 1.642, "step": 105000 }, { "epoch": 0.68, "learning_rate": 1.607609196579446e-05, "loss": 2.0272, "step": 106000 }, { "epoch": 0.68, "eval_loss": 2.243058681488037, "eval_runtime": 19.4712, "eval_samples_per_second": 102.716, "eval_steps_per_second": 1.643, "step": 106000 }, { "epoch": 0.68, "learning_rate": 1.5756055097547238e-05, "loss": 2.0276, "step": 107000 }, { "epoch": 0.68, "eval_loss": 2.2137022018432617, "eval_runtime": 18.6801, "eval_samples_per_second": 107.066, "eval_steps_per_second": 1.713, "step": 107000 }, { "epoch": 0.69, "learning_rate": 1.5436018229300017e-05, "loss": 2.0224, "step": 108000 }, { "epoch": 0.69, "eval_loss": 2.2309203147888184, "eval_runtime": 18.8357, "eval_samples_per_second": 106.181, "eval_steps_per_second": 1.699, "step": 108000 }, { "epoch": 0.7, "learning_rate": 1.5115981361052795e-05, "loss": 2.0253, "step": 109000 }, { "epoch": 0.7, "eval_loss": 2.2213120460510254, "eval_runtime": 19.2801, "eval_samples_per_second": 103.734, "eval_steps_per_second": 1.66, "step": 109000 }, { "epoch": 0.7, "learning_rate": 1.4795944492805572e-05, "loss": 2.0199, "step": 110000 }, { "epoch": 0.7, "eval_loss": 2.2416763305664062, "eval_runtime": 18.8526, "eval_samples_per_second": 106.086, "eval_steps_per_second": 1.697, "step": 110000 }, { "epoch": 0.71, "learning_rate": 1.4475907624558348e-05, "loss": 2.0216, "step": 111000 }, { "epoch": 0.71, "eval_loss": 2.24078369140625, "eval_runtime": 18.5093, "eval_samples_per_second": 108.054, "eval_steps_per_second": 1.729, "step": 111000 }, { "epoch": 0.72, "learning_rate": 1.4155870756311127e-05, "loss": 2.0236, "step": 112000 }, { "epoch": 0.72, "eval_loss": 2.2598512172698975, "eval_runtime": 19.0496, "eval_samples_per_second": 104.989, "eval_steps_per_second": 1.68, "step": 112000 }, { "epoch": 0.72, "learning_rate": 1.3835833888063907e-05, "loss": 2.0247, "step": 113000 }, { "epoch": 0.72, "eval_loss": 2.2282919883728027, "eval_runtime": 18.7751, "eval_samples_per_second": 106.524, "eval_steps_per_second": 1.704, "step": 113000 }, { "epoch": 0.73, "learning_rate": 1.3515797019816683e-05, "loss": 2.0263, "step": 114000 }, { "epoch": 0.73, "eval_loss": 2.248234748840332, "eval_runtime": 18.8567, "eval_samples_per_second": 106.063, "eval_steps_per_second": 1.697, "step": 114000 }, { "epoch": 0.74, "learning_rate": 1.3195760151569462e-05, "loss": 2.014, "step": 115000 }, { "epoch": 0.74, "eval_loss": 2.194716691970825, "eval_runtime": 18.7872, "eval_samples_per_second": 106.455, "eval_steps_per_second": 1.703, "step": 115000 }, { "epoch": 0.74, "learning_rate": 1.2875723283322239e-05, "loss": 2.0076, "step": 116000 }, { "epoch": 0.74, "eval_loss": 2.233458995819092, "eval_runtime": 18.8711, "eval_samples_per_second": 105.982, "eval_steps_per_second": 1.696, "step": 116000 }, { "epoch": 0.75, "learning_rate": 1.2555686415075015e-05, "loss": 2.011, "step": 117000 }, { "epoch": 0.75, "eval_loss": 2.213284492492676, "eval_runtime": 19.5167, "eval_samples_per_second": 102.477, "eval_steps_per_second": 1.64, "step": 117000 }, { "epoch": 0.76, "learning_rate": 1.2235649546827795e-05, "loss": 2.0216, "step": 118000 }, { "epoch": 0.76, "eval_loss": 2.210317373275757, "eval_runtime": 18.6333, "eval_samples_per_second": 107.334, "eval_steps_per_second": 1.717, "step": 118000 }, { "epoch": 0.76, "learning_rate": 1.1915612678580574e-05, "loss": 2.0097, "step": 119000 }, { "epoch": 0.76, "eval_loss": 2.241175651550293, "eval_runtime": 18.694, "eval_samples_per_second": 106.986, "eval_steps_per_second": 1.712, "step": 119000 }, { "epoch": 0.77, "learning_rate": 1.159557581033335e-05, "loss": 2.0076, "step": 120000 }, { "epoch": 0.77, "eval_loss": 2.2543113231658936, "eval_runtime": 18.8082, "eval_samples_per_second": 106.336, "eval_steps_per_second": 1.701, "step": 120000 }, { "epoch": 0.77, "learning_rate": 1.1275538942086129e-05, "loss": 2.01, "step": 121000 }, { "epoch": 0.77, "eval_loss": 2.2642598152160645, "eval_runtime": 19.4522, "eval_samples_per_second": 102.816, "eval_steps_per_second": 1.645, "step": 121000 }, { "epoch": 0.78, "learning_rate": 1.0955502073838907e-05, "loss": 2.0074, "step": 122000 }, { "epoch": 0.78, "eval_loss": 2.2413113117218018, "eval_runtime": 18.8078, "eval_samples_per_second": 106.339, "eval_steps_per_second": 1.701, "step": 122000 }, { "epoch": 0.79, "learning_rate": 1.0635465205591686e-05, "loss": 1.9898, "step": 123000 }, { "epoch": 0.79, "eval_loss": 2.2442147731781006, "eval_runtime": 18.8179, "eval_samples_per_second": 106.282, "eval_steps_per_second": 1.701, "step": 123000 }, { "epoch": 0.79, "learning_rate": 1.0315428337344462e-05, "loss": 2.0119, "step": 124000 }, { "epoch": 0.79, "eval_loss": 2.227520704269409, "eval_runtime": 18.9241, "eval_samples_per_second": 105.686, "eval_steps_per_second": 1.691, "step": 124000 }, { "epoch": 0.8, "learning_rate": 9.99539146909724e-06, "loss": 1.993, "step": 125000 }, { "epoch": 0.8, "eval_loss": 2.2116286754608154, "eval_runtime": 19.6066, "eval_samples_per_second": 102.007, "eval_steps_per_second": 1.632, "step": 125000 }, { "epoch": 0.81, "learning_rate": 9.675354600850019e-06, "loss": 2.0092, "step": 126000 }, { "epoch": 0.81, "eval_loss": 2.2108232975006104, "eval_runtime": 18.7069, "eval_samples_per_second": 106.912, "eval_steps_per_second": 1.711, "step": 126000 }, { "epoch": 0.81, "learning_rate": 9.355317732602796e-06, "loss": 2.0019, "step": 127000 }, { "epoch": 0.81, "eval_loss": 2.2236363887786865, "eval_runtime": 18.8801, "eval_samples_per_second": 105.931, "eval_steps_per_second": 1.695, "step": 127000 }, { "epoch": 0.82, "learning_rate": 9.035280864355574e-06, "loss": 1.9931, "step": 128000 }, { "epoch": 0.82, "eval_loss": 2.2105228900909424, "eval_runtime": 21.3819, "eval_samples_per_second": 93.537, "eval_steps_per_second": 1.497, "step": 128000 }, { "epoch": 0.83, "learning_rate": 8.715243996108352e-06, "loss": 1.9851, "step": 129000 }, { "epoch": 0.83, "eval_loss": 2.2179064750671387, "eval_runtime": 19.3741, "eval_samples_per_second": 103.231, "eval_steps_per_second": 1.652, "step": 129000 }, { "epoch": 0.83, "learning_rate": 8.395207127861129e-06, "loss": 1.9882, "step": 130000 }, { "epoch": 0.83, "eval_loss": 2.2303926944732666, "eval_runtime": 18.846, "eval_samples_per_second": 106.123, "eval_steps_per_second": 1.698, "step": 130000 }, { "epoch": 0.84, "learning_rate": 8.075170259613907e-06, "loss": 1.999, "step": 131000 }, { "epoch": 0.84, "eval_loss": 2.202813148498535, "eval_runtime": 19.3498, "eval_samples_per_second": 103.36, "eval_steps_per_second": 1.654, "step": 131000 }, { "epoch": 0.84, "learning_rate": 7.755133391366686e-06, "loss": 1.9848, "step": 132000 }, { "epoch": 0.84, "eval_loss": 2.1549251079559326, "eval_runtime": 20.1588, "eval_samples_per_second": 99.212, "eval_steps_per_second": 1.587, "step": 132000 }, { "epoch": 0.85, "learning_rate": 7.435096523119464e-06, "loss": 1.9962, "step": 133000 }, { "epoch": 0.85, "eval_loss": 2.2457568645477295, "eval_runtime": 19.1213, "eval_samples_per_second": 104.595, "eval_steps_per_second": 1.674, "step": 133000 }, { "epoch": 0.86, "learning_rate": 7.115059654872242e-06, "loss": 1.991, "step": 134000 }, { "epoch": 0.86, "eval_loss": 2.1861023902893066, "eval_runtime": 19.2023, "eval_samples_per_second": 104.154, "eval_steps_per_second": 1.666, "step": 134000 }, { "epoch": 0.86, "learning_rate": 6.79502278662502e-06, "loss": 1.9901, "step": 135000 }, { "epoch": 0.86, "eval_loss": 2.2025179862976074, "eval_runtime": 19.2167, "eval_samples_per_second": 104.076, "eval_steps_per_second": 1.665, "step": 135000 }, { "epoch": 0.87, "learning_rate": 6.474985918377798e-06, "loss": 1.9698, "step": 136000 }, { "epoch": 0.87, "eval_loss": 2.2299790382385254, "eval_runtime": 20.1153, "eval_samples_per_second": 99.427, "eval_steps_per_second": 1.591, "step": 136000 }, { "epoch": 0.88, "learning_rate": 6.154949050130575e-06, "loss": 1.9772, "step": 137000 }, { "epoch": 0.88, "eval_loss": 2.1934893131256104, "eval_runtime": 19.0706, "eval_samples_per_second": 104.874, "eval_steps_per_second": 1.678, "step": 137000 }, { "epoch": 0.88, "learning_rate": 5.8349121818833536e-06, "loss": 1.974, "step": 138000 }, { "epoch": 0.88, "eval_loss": 2.201178789138794, "eval_runtime": 18.9851, "eval_samples_per_second": 105.346, "eval_steps_per_second": 1.686, "step": 138000 }, { "epoch": 0.89, "learning_rate": 5.514875313636131e-06, "loss": 1.9906, "step": 139000 }, { "epoch": 0.89, "eval_loss": 2.2042794227600098, "eval_runtime": 19.1406, "eval_samples_per_second": 104.49, "eval_steps_per_second": 1.672, "step": 139000 }, { "epoch": 0.9, "learning_rate": 5.194838445388909e-06, "loss": 1.9899, "step": 140000 }, { "epoch": 0.9, "eval_loss": 2.187676191329956, "eval_runtime": 19.4746, "eval_samples_per_second": 102.698, "eval_steps_per_second": 1.643, "step": 140000 }, { "epoch": 0.9, "learning_rate": 4.874801577141687e-06, "loss": 1.9785, "step": 141000 }, { "epoch": 0.9, "eval_loss": 2.2104039192199707, "eval_runtime": 19.2016, "eval_samples_per_second": 104.158, "eval_steps_per_second": 1.667, "step": 141000 }, { "epoch": 0.91, "learning_rate": 4.5547647088944646e-06, "loss": 1.9682, "step": 142000 }, { "epoch": 0.91, "eval_loss": 2.1898605823516846, "eval_runtime": 19.2296, "eval_samples_per_second": 104.006, "eval_steps_per_second": 1.664, "step": 142000 }, { "epoch": 0.92, "learning_rate": 4.234727840647243e-06, "loss": 1.9785, "step": 143000 }, { "epoch": 0.92, "eval_loss": 2.183152437210083, "eval_runtime": 19.1118, "eval_samples_per_second": 104.647, "eval_steps_per_second": 1.674, "step": 143000 }, { "epoch": 0.92, "learning_rate": 3.914690972400021e-06, "loss": 1.9795, "step": 144000 }, { "epoch": 0.92, "eval_loss": 2.199709415435791, "eval_runtime": 19.352, "eval_samples_per_second": 103.348, "eval_steps_per_second": 1.654, "step": 144000 }, { "epoch": 0.93, "learning_rate": 3.5946541041527984e-06, "loss": 1.9656, "step": 145000 }, { "epoch": 0.93, "eval_loss": 2.200268507003784, "eval_runtime": 19.3103, "eval_samples_per_second": 103.572, "eval_steps_per_second": 1.657, "step": 145000 }, { "epoch": 0.93, "learning_rate": 3.2746172359055764e-06, "loss": 1.9813, "step": 146000 }, { "epoch": 0.93, "eval_loss": 2.1825687885284424, "eval_runtime": 19.0952, "eval_samples_per_second": 104.739, "eval_steps_per_second": 1.676, "step": 146000 }, { "epoch": 0.94, "learning_rate": 2.9545803676583543e-06, "loss": 1.9719, "step": 147000 }, { "epoch": 0.94, "eval_loss": 2.1915125846862793, "eval_runtime": 19.3108, "eval_samples_per_second": 103.569, "eval_steps_per_second": 1.657, "step": 147000 }, { "epoch": 0.95, "learning_rate": 2.6345434994111323e-06, "loss": 1.962, "step": 148000 }, { "epoch": 0.95, "eval_loss": 2.196523904800415, "eval_runtime": 19.1234, "eval_samples_per_second": 104.584, "eval_steps_per_second": 1.673, "step": 148000 }, { "epoch": 0.95, "learning_rate": 2.3145066311639102e-06, "loss": 1.9657, "step": 149000 }, { "epoch": 0.95, "eval_loss": 2.1772007942199707, "eval_runtime": 19.0921, "eval_samples_per_second": 104.756, "eval_steps_per_second": 1.676, "step": 149000 }, { "epoch": 0.96, "learning_rate": 1.994469762916688e-06, "loss": 1.9662, "step": 150000 }, { "epoch": 0.96, "eval_loss": 2.151597261428833, "eval_runtime": 19.1055, "eval_samples_per_second": 104.682, "eval_steps_per_second": 1.675, "step": 150000 }, { "epoch": 0.97, "learning_rate": 1.674432894669466e-06, "loss": 1.9631, "step": 151000 }, { "epoch": 0.97, "eval_loss": 2.1692702770233154, "eval_runtime": 19.4031, "eval_samples_per_second": 103.077, "eval_steps_per_second": 1.649, "step": 151000 }, { "epoch": 0.97, "learning_rate": 1.354396026422244e-06, "loss": 1.9651, "step": 152000 }, { "epoch": 0.97, "eval_loss": 2.174436330795288, "eval_runtime": 19.3133, "eval_samples_per_second": 103.555, "eval_steps_per_second": 1.657, "step": 152000 }, { "epoch": 0.98, "learning_rate": 1.0343591581750219e-06, "loss": 1.9761, "step": 153000 }, { "epoch": 0.98, "eval_loss": 2.1922249794006348, "eval_runtime": 19.2415, "eval_samples_per_second": 103.942, "eval_steps_per_second": 1.663, "step": 153000 }, { "epoch": 0.99, "learning_rate": 7.143222899277997e-07, "loss": 1.9602, "step": 154000 }, { "epoch": 0.99, "eval_loss": 2.177457571029663, "eval_runtime": 19.1279, "eval_samples_per_second": 104.559, "eval_steps_per_second": 1.673, "step": 154000 }, { "epoch": 0.99, "learning_rate": 3.9428542168057766e-07, "loss": 1.9429, "step": 155000 }, { "epoch": 0.99, "eval_loss": 2.167567491531372, "eval_runtime": 19.5087, "eval_samples_per_second": 102.518, "eval_steps_per_second": 1.64, "step": 155000 }, { "epoch": 1.0, "learning_rate": 7.424855343335553e-08, "loss": 1.9662, "step": 156000 }, { "epoch": 1.0, "eval_loss": 2.179702043533325, "eval_runtime": 19.3655, "eval_samples_per_second": 103.276, "eval_steps_per_second": 1.652, "step": 156000 }, { "epoch": 1.0, "learning_rate": 2.4877105842593068e-05, "loss": 1.9883, "step": 157000 }, { "epoch": 1.0, "eval_loss": 2.1984949111938477, "eval_runtime": 19.5918, "eval_samples_per_second": 102.084, "eval_steps_per_second": 1.633, "step": 157000 }, { "epoch": 1.01, "learning_rate": 2.4717087408469457e-05, "loss": 2.0127, "step": 158000 }, { "epoch": 1.01, "eval_loss": 2.264371633529663, "eval_runtime": 19.1742, "eval_samples_per_second": 104.307, "eval_steps_per_second": 1.669, "step": 158000 }, { "epoch": 1.02, "learning_rate": 2.4557068974345846e-05, "loss": 2.013, "step": 159000 }, { "epoch": 1.02, "eval_loss": 2.263242721557617, "eval_runtime": 19.0844, "eval_samples_per_second": 104.798, "eval_steps_per_second": 1.677, "step": 159000 }, { "epoch": 1.02, "learning_rate": 2.4397050540222236e-05, "loss": 2.0243, "step": 160000 }, { "epoch": 1.02, "eval_loss": 2.267091751098633, "eval_runtime": 19.3957, "eval_samples_per_second": 103.116, "eval_steps_per_second": 1.65, "step": 160000 }, { "epoch": 1.03, "learning_rate": 2.423703210609862e-05, "loss": 2.021, "step": 161000 }, { "epoch": 1.03, "eval_loss": 2.2471094131469727, "eval_runtime": 19.2438, "eval_samples_per_second": 103.929, "eval_steps_per_second": 1.663, "step": 161000 }, { "epoch": 1.04, "learning_rate": 2.4077013671975014e-05, "loss": 2.0278, "step": 162000 }, { "epoch": 1.04, "eval_loss": 2.2140402793884277, "eval_runtime": 19.0312, "eval_samples_per_second": 105.091, "eval_steps_per_second": 1.681, "step": 162000 }, { "epoch": 1.04, "learning_rate": 2.3916995237851403e-05, "loss": 2.0109, "step": 163000 }, { "epoch": 1.04, "eval_loss": 2.2622554302215576, "eval_runtime": 19.0334, "eval_samples_per_second": 105.078, "eval_steps_per_second": 1.681, "step": 163000 }, { "epoch": 1.05, "learning_rate": 2.375697680372779e-05, "loss": 2.023, "step": 164000 }, { "epoch": 1.05, "eval_loss": 2.245877981185913, "eval_runtime": 19.4264, "eval_samples_per_second": 102.953, "eval_steps_per_second": 1.647, "step": 164000 }, { "epoch": 1.06, "learning_rate": 2.359695836960418e-05, "loss": 2.0187, "step": 165000 }, { "epoch": 1.06, "eval_loss": 2.25624942779541, "eval_runtime": 19.303, "eval_samples_per_second": 103.611, "eval_steps_per_second": 1.658, "step": 165000 }, { "epoch": 1.06, "learning_rate": 2.3436939935480567e-05, "loss": 2.019, "step": 166000 }, { "epoch": 1.06, "eval_loss": 2.2587056159973145, "eval_runtime": 18.8102, "eval_samples_per_second": 106.325, "eval_steps_per_second": 1.701, "step": 166000 }, { "epoch": 1.07, "learning_rate": 2.3276921501356956e-05, "loss": 2.0208, "step": 167000 }, { "epoch": 1.07, "eval_loss": 2.2842631340026855, "eval_runtime": 19.22, "eval_samples_per_second": 104.058, "eval_steps_per_second": 1.665, "step": 167000 }, { "epoch": 1.08, "learning_rate": 2.311690306723335e-05, "loss": 2.0043, "step": 168000 }, { "epoch": 1.08, "eval_loss": 2.2638208866119385, "eval_runtime": 19.6646, "eval_samples_per_second": 101.706, "eval_steps_per_second": 1.627, "step": 168000 }, { "epoch": 1.08, "learning_rate": 2.2956884633109735e-05, "loss": 2.0171, "step": 169000 }, { "epoch": 1.08, "eval_loss": 2.2604892253875732, "eval_runtime": 19.2438, "eval_samples_per_second": 103.93, "eval_steps_per_second": 1.663, "step": 169000 }, { "epoch": 1.09, "learning_rate": 2.2796866198986124e-05, "loss": 2.0351, "step": 170000 }, { "epoch": 1.09, "eval_loss": 2.2608911991119385, "eval_runtime": 19.3036, "eval_samples_per_second": 103.607, "eval_steps_per_second": 1.658, "step": 170000 }, { "epoch": 1.09, "learning_rate": 2.2636847764862513e-05, "loss": 2.0166, "step": 171000 }, { "epoch": 1.09, "eval_loss": 2.2317748069763184, "eval_runtime": 19.0555, "eval_samples_per_second": 104.957, "eval_steps_per_second": 1.679, "step": 171000 }, { "epoch": 1.1, "learning_rate": 2.2476829330738902e-05, "loss": 2.0102, "step": 172000 }, { "epoch": 1.1, "eval_loss": 2.2210681438446045, "eval_runtime": 19.4253, "eval_samples_per_second": 102.958, "eval_steps_per_second": 1.647, "step": 172000 }, { "epoch": 1.11, "learning_rate": 2.231681089661529e-05, "loss": 2.0226, "step": 173000 }, { "epoch": 1.11, "eval_loss": 2.2446329593658447, "eval_runtime": 19.1758, "eval_samples_per_second": 104.298, "eval_steps_per_second": 1.669, "step": 173000 }, { "epoch": 1.11, "learning_rate": 2.215679246249168e-05, "loss": 2.0293, "step": 174000 }, { "epoch": 1.11, "eval_loss": 2.2327494621276855, "eval_runtime": 19.0577, "eval_samples_per_second": 104.945, "eval_steps_per_second": 1.679, "step": 174000 }, { "epoch": 1.12, "learning_rate": 2.199677402836807e-05, "loss": 2.0269, "step": 175000 }, { "epoch": 1.12, "eval_loss": 2.223355293273926, "eval_runtime": 19.372, "eval_samples_per_second": 103.242, "eval_steps_per_second": 1.652, "step": 175000 }, { "epoch": 1.13, "learning_rate": 2.1836755594244456e-05, "loss": 2.0232, "step": 176000 }, { "epoch": 1.13, "eval_loss": 2.2283060550689697, "eval_runtime": 19.4986, "eval_samples_per_second": 102.572, "eval_steps_per_second": 1.641, "step": 176000 }, { "epoch": 1.13, "learning_rate": 2.167673716012085e-05, "loss": 2.0155, "step": 177000 }, { "epoch": 1.13, "eval_loss": 2.241269588470459, "eval_runtime": 19.5594, "eval_samples_per_second": 102.253, "eval_steps_per_second": 1.636, "step": 177000 }, { "epoch": 1.14, "learning_rate": 2.1516718725997238e-05, "loss": 2.0148, "step": 178000 }, { "epoch": 1.14, "eval_loss": 2.2584030628204346, "eval_runtime": 18.9767, "eval_samples_per_second": 105.392, "eval_steps_per_second": 1.686, "step": 178000 }, { "epoch": 1.15, "learning_rate": 2.1356700291873623e-05, "loss": 2.0167, "step": 179000 }, { "epoch": 1.15, "eval_loss": 2.2308297157287598, "eval_runtime": 19.318, "eval_samples_per_second": 103.531, "eval_steps_per_second": 1.656, "step": 179000 }, { "epoch": 1.15, "learning_rate": 2.1196681857750016e-05, "loss": 2.0204, "step": 180000 }, { "epoch": 1.15, "eval_loss": 2.2320470809936523, "eval_runtime": 19.5088, "eval_samples_per_second": 102.518, "eval_steps_per_second": 1.64, "step": 180000 }, { "epoch": 1.16, "learning_rate": 2.1036663423626402e-05, "loss": 2.014, "step": 181000 }, { "epoch": 1.16, "eval_loss": 2.25752854347229, "eval_runtime": 19.1454, "eval_samples_per_second": 104.464, "eval_steps_per_second": 1.671, "step": 181000 }, { "epoch": 1.16, "learning_rate": 2.087664498950279e-05, "loss": 2.0149, "step": 182000 }, { "epoch": 1.16, "eval_loss": 2.2161190509796143, "eval_runtime": 19.0879, "eval_samples_per_second": 104.779, "eval_steps_per_second": 1.676, "step": 182000 }, { "epoch": 1.17, "learning_rate": 2.071662655537918e-05, "loss": 2.0082, "step": 183000 }, { "epoch": 1.17, "eval_loss": 2.2062742710113525, "eval_runtime": 19.2713, "eval_samples_per_second": 103.781, "eval_steps_per_second": 1.66, "step": 183000 }, { "epoch": 1.18, "learning_rate": 2.055660812125557e-05, "loss": 2.0017, "step": 184000 }, { "epoch": 1.18, "eval_loss": 2.2289586067199707, "eval_runtime": 19.4499, "eval_samples_per_second": 102.828, "eval_steps_per_second": 1.645, "step": 184000 }, { "epoch": 1.18, "learning_rate": 2.039658968713196e-05, "loss": 2.0146, "step": 185000 }, { "epoch": 1.18, "eval_loss": 2.2288384437561035, "eval_runtime": 19.335, "eval_samples_per_second": 103.439, "eval_steps_per_second": 1.655, "step": 185000 }, { "epoch": 1.19, "learning_rate": 2.0236571253008348e-05, "loss": 2.024, "step": 186000 }, { "epoch": 1.19, "eval_loss": 2.194934606552124, "eval_runtime": 19.5009, "eval_samples_per_second": 102.559, "eval_steps_per_second": 1.641, "step": 186000 }, { "epoch": 1.2, "learning_rate": 2.0076552818884737e-05, "loss": 2.0016, "step": 187000 }, { "epoch": 1.2, "eval_loss": 2.197631597518921, "eval_runtime": 19.2128, "eval_samples_per_second": 104.097, "eval_steps_per_second": 1.666, "step": 187000 }, { "epoch": 1.2, "learning_rate": 1.9916534384761126e-05, "loss": 2.0066, "step": 188000 }, { "epoch": 1.2, "eval_loss": 2.238746166229248, "eval_runtime": 19.4524, "eval_samples_per_second": 102.815, "eval_steps_per_second": 1.645, "step": 188000 }, { "epoch": 1.21, "learning_rate": 1.9756515950637515e-05, "loss": 2.0168, "step": 189000 }, { "epoch": 1.21, "eval_loss": 2.2261757850646973, "eval_runtime": 19.645, "eval_samples_per_second": 101.807, "eval_steps_per_second": 1.629, "step": 189000 }, { "epoch": 1.22, "learning_rate": 1.9596497516513904e-05, "loss": 2.0023, "step": 190000 }, { "epoch": 1.22, "eval_loss": 2.2070722579956055, "eval_runtime": 19.0874, "eval_samples_per_second": 104.781, "eval_steps_per_second": 1.676, "step": 190000 }, { "epoch": 1.22, "learning_rate": 1.943647908239029e-05, "loss": 1.9917, "step": 191000 }, { "epoch": 1.22, "eval_loss": 2.2613461017608643, "eval_runtime": 19.1099, "eval_samples_per_second": 104.658, "eval_steps_per_second": 1.675, "step": 191000 }, { "epoch": 1.23, "learning_rate": 1.927646064826668e-05, "loss": 2.01, "step": 192000 }, { "epoch": 1.23, "eval_loss": 2.2324349880218506, "eval_runtime": 20.8611, "eval_samples_per_second": 95.872, "eval_steps_per_second": 1.534, "step": 192000 }, { "epoch": 1.24, "learning_rate": 1.9116442214143072e-05, "loss": 2.0023, "step": 193000 }, { "epoch": 1.24, "eval_loss": 2.2707834243774414, "eval_runtime": 19.7356, "eval_samples_per_second": 101.34, "eval_steps_per_second": 1.621, "step": 193000 }, { "epoch": 1.24, "learning_rate": 1.8956423780019458e-05, "loss": 2.0037, "step": 194000 }, { "epoch": 1.24, "eval_loss": 2.2384769916534424, "eval_runtime": 19.0414, "eval_samples_per_second": 105.034, "eval_steps_per_second": 1.681, "step": 194000 }, { "epoch": 1.25, "learning_rate": 1.8796405345895847e-05, "loss": 1.9994, "step": 195000 }, { "epoch": 1.25, "eval_loss": 2.192796230316162, "eval_runtime": 19.0496, "eval_samples_per_second": 104.989, "eval_steps_per_second": 1.68, "step": 195000 }, { "epoch": 1.25, "learning_rate": 1.863638691177224e-05, "loss": 1.994, "step": 196000 }, { "epoch": 1.25, "eval_loss": 2.170961618423462, "eval_runtime": 19.6903, "eval_samples_per_second": 101.573, "eval_steps_per_second": 1.625, "step": 196000 }, { "epoch": 1.26, "learning_rate": 1.8476368477648625e-05, "loss": 2.0016, "step": 197000 }, { "epoch": 1.26, "eval_loss": 2.2660317420959473, "eval_runtime": 19.6654, "eval_samples_per_second": 101.702, "eval_steps_per_second": 1.627, "step": 197000 }, { "epoch": 1.27, "learning_rate": 1.8316350043525015e-05, "loss": 2.0044, "step": 198000 }, { "epoch": 1.27, "eval_loss": 2.204163074493408, "eval_runtime": 18.9759, "eval_samples_per_second": 105.397, "eval_steps_per_second": 1.686, "step": 198000 }, { "epoch": 1.27, "learning_rate": 1.8156331609401404e-05, "loss": 1.9962, "step": 199000 }, { "epoch": 1.27, "eval_loss": 2.214494228363037, "eval_runtime": 19.1044, "eval_samples_per_second": 104.688, "eval_steps_per_second": 1.675, "step": 199000 }, { "epoch": 1.28, "learning_rate": 1.7996313175277793e-05, "loss": 2.002, "step": 200000 }, { "epoch": 1.28, "eval_loss": 2.231771230697632, "eval_runtime": 19.3683, "eval_samples_per_second": 103.262, "eval_steps_per_second": 1.652, "step": 200000 }, { "epoch": 1.29, "learning_rate": 1.7836294741154182e-05, "loss": 1.9933, "step": 201000 }, { "epoch": 1.29, "eval_loss": 2.2037816047668457, "eval_runtime": 19.3894, "eval_samples_per_second": 103.149, "eval_steps_per_second": 1.65, "step": 201000 }, { "epoch": 1.29, "learning_rate": 1.767627630703057e-05, "loss": 2.01, "step": 202000 }, { "epoch": 1.29, "eval_loss": 2.1932146549224854, "eval_runtime": 19.0804, "eval_samples_per_second": 104.819, "eval_steps_per_second": 1.677, "step": 202000 }, { "epoch": 1.3, "learning_rate": 1.751625787290696e-05, "loss": 1.9876, "step": 203000 }, { "epoch": 1.3, "eval_loss": 2.1909868717193604, "eval_runtime": 19.2334, "eval_samples_per_second": 103.986, "eval_steps_per_second": 1.664, "step": 203000 }, { "epoch": 1.31, "learning_rate": 1.7356239438783346e-05, "loss": 1.9959, "step": 204000 }, { "epoch": 1.31, "eval_loss": 2.226149559020996, "eval_runtime": 19.403, "eval_samples_per_second": 103.077, "eval_steps_per_second": 1.649, "step": 204000 }, { "epoch": 1.31, "learning_rate": 1.719622100465974e-05, "loss": 1.9966, "step": 205000 }, { "epoch": 1.31, "eval_loss": 2.250934600830078, "eval_runtime": 19.4964, "eval_samples_per_second": 102.583, "eval_steps_per_second": 1.641, "step": 205000 }, { "epoch": 1.32, "learning_rate": 1.7036202570536128e-05, "loss": 2.001, "step": 206000 }, { "epoch": 1.32, "eval_loss": 2.1994211673736572, "eval_runtime": 19.1839, "eval_samples_per_second": 104.254, "eval_steps_per_second": 1.668, "step": 206000 }, { "epoch": 1.32, "learning_rate": 1.6876184136412514e-05, "loss": 1.9883, "step": 207000 }, { "epoch": 1.32, "eval_loss": 2.196751356124878, "eval_runtime": 19.6979, "eval_samples_per_second": 101.534, "eval_steps_per_second": 1.625, "step": 207000 }, { "epoch": 1.33, "learning_rate": 1.6716165702288906e-05, "loss": 1.9968, "step": 208000 }, { "epoch": 1.33, "eval_loss": 2.248135805130005, "eval_runtime": 19.2411, "eval_samples_per_second": 103.944, "eval_steps_per_second": 1.663, "step": 208000 }, { "epoch": 1.34, "learning_rate": 1.6556147268165292e-05, "loss": 1.9951, "step": 209000 }, { "epoch": 1.34, "eval_loss": 2.213362216949463, "eval_runtime": 19.146, "eval_samples_per_second": 104.46, "eval_steps_per_second": 1.671, "step": 209000 }, { "epoch": 1.34, "learning_rate": 1.639612883404168e-05, "loss": 1.9941, "step": 210000 }, { "epoch": 1.34, "eval_loss": 2.219302177429199, "eval_runtime": 19.0054, "eval_samples_per_second": 105.233, "eval_steps_per_second": 1.684, "step": 210000 }, { "epoch": 1.35, "learning_rate": 1.6236110399918074e-05, "loss": 1.9875, "step": 211000 }, { "epoch": 1.35, "eval_loss": 2.2148916721343994, "eval_runtime": 19.4732, "eval_samples_per_second": 102.705, "eval_steps_per_second": 1.643, "step": 211000 }, { "epoch": 1.36, "learning_rate": 1.607609196579446e-05, "loss": 2.0026, "step": 212000 }, { "epoch": 1.36, "eval_loss": 2.197999954223633, "eval_runtime": 19.3649, "eval_samples_per_second": 103.28, "eval_steps_per_second": 1.652, "step": 212000 }, { "epoch": 1.36, "learning_rate": 1.591607353167085e-05, "loss": 1.9908, "step": 213000 }, { "epoch": 1.36, "eval_loss": 2.2245354652404785, "eval_runtime": 19.4688, "eval_samples_per_second": 102.728, "eval_steps_per_second": 1.644, "step": 213000 }, { "epoch": 1.37, "learning_rate": 1.5756055097547238e-05, "loss": 1.979, "step": 214000 }, { "epoch": 1.37, "eval_loss": 2.186586856842041, "eval_runtime": 19.6234, "eval_samples_per_second": 101.919, "eval_steps_per_second": 1.631, "step": 214000 }, { "epoch": 1.38, "learning_rate": 1.5596036663423627e-05, "loss": 1.99, "step": 215000 }, { "epoch": 1.38, "eval_loss": 2.182631015777588, "eval_runtime": 19.4018, "eval_samples_per_second": 103.083, "eval_steps_per_second": 1.649, "step": 215000 }, { "epoch": 1.38, "learning_rate": 1.5436018229300017e-05, "loss": 1.9816, "step": 216000 }, { "epoch": 1.38, "eval_loss": 2.187858819961548, "eval_runtime": 19.4098, "eval_samples_per_second": 103.041, "eval_steps_per_second": 1.649, "step": 216000 }, { "epoch": 1.39, "learning_rate": 1.5275999795176406e-05, "loss": 1.989, "step": 217000 }, { "epoch": 1.39, "eval_loss": 2.232002019882202, "eval_runtime": 19.4529, "eval_samples_per_second": 102.813, "eval_steps_per_second": 1.645, "step": 217000 }, { "epoch": 1.4, "learning_rate": 1.5115981361052795e-05, "loss": 1.9931, "step": 218000 }, { "epoch": 1.4, "eval_loss": 2.1929688453674316, "eval_runtime": 19.3402, "eval_samples_per_second": 103.411, "eval_steps_per_second": 1.655, "step": 218000 }, { "epoch": 1.4, "learning_rate": 1.4955962926929182e-05, "loss": 1.9804, "step": 219000 }, { "epoch": 1.4, "eval_loss": 2.2313404083251953, "eval_runtime": 19.6691, "eval_samples_per_second": 101.682, "eval_steps_per_second": 1.627, "step": 219000 }, { "epoch": 1.41, "learning_rate": 1.4795944492805572e-05, "loss": 1.9902, "step": 220000 }, { "epoch": 1.41, "eval_loss": 2.1808815002441406, "eval_runtime": 19.8875, "eval_samples_per_second": 100.566, "eval_steps_per_second": 1.609, "step": 220000 }, { "epoch": 1.41, "learning_rate": 1.4635926058681963e-05, "loss": 1.9791, "step": 221000 }, { "epoch": 1.41, "eval_loss": 2.1454262733459473, "eval_runtime": 19.9595, "eval_samples_per_second": 100.203, "eval_steps_per_second": 1.603, "step": 221000 }, { "epoch": 1.42, "learning_rate": 1.4475907624558348e-05, "loss": 1.9702, "step": 222000 }, { "epoch": 1.42, "eval_loss": 2.220078468322754, "eval_runtime": 19.5477, "eval_samples_per_second": 102.314, "eval_steps_per_second": 1.637, "step": 222000 }, { "epoch": 1.43, "learning_rate": 1.431588919043474e-05, "loss": 1.9848, "step": 223000 }, { "epoch": 1.43, "eval_loss": 2.198873281478882, "eval_runtime": 19.8165, "eval_samples_per_second": 100.926, "eval_steps_per_second": 1.615, "step": 223000 }, { "epoch": 1.43, "learning_rate": 1.4155870756311127e-05, "loss": 1.9813, "step": 224000 }, { "epoch": 1.43, "eval_loss": 2.197327136993408, "eval_runtime": 21.9598, "eval_samples_per_second": 91.076, "eval_steps_per_second": 1.457, "step": 224000 }, { "epoch": 1.44, "learning_rate": 1.3995852322187516e-05, "loss": 1.9784, "step": 225000 }, { "epoch": 1.44, "eval_loss": 2.189138889312744, "eval_runtime": 19.3319, "eval_samples_per_second": 103.456, "eval_steps_per_second": 1.655, "step": 225000 }, { "epoch": 1.45, "learning_rate": 1.3835833888063907e-05, "loss": 1.9766, "step": 226000 }, { "epoch": 1.45, "eval_loss": 2.20912504196167, "eval_runtime": 19.5253, "eval_samples_per_second": 102.431, "eval_steps_per_second": 1.639, "step": 226000 }, { "epoch": 1.45, "learning_rate": 1.3675815453940294e-05, "loss": 1.9732, "step": 227000 }, { "epoch": 1.45, "eval_loss": 2.140838384628296, "eval_runtime": 19.1497, "eval_samples_per_second": 104.44, "eval_steps_per_second": 1.671, "step": 227000 }, { "epoch": 1.46, "learning_rate": 1.3515797019816683e-05, "loss": 1.9621, "step": 228000 }, { "epoch": 1.46, "eval_loss": 2.226170063018799, "eval_runtime": 19.0166, "eval_samples_per_second": 105.171, "eval_steps_per_second": 1.683, "step": 228000 }, { "epoch": 1.47, "learning_rate": 1.3355778585693071e-05, "loss": 1.9739, "step": 229000 }, { "epoch": 1.47, "eval_loss": 2.2281548976898193, "eval_runtime": 19.3581, "eval_samples_per_second": 103.316, "eval_steps_per_second": 1.653, "step": 229000 }, { "epoch": 1.47, "learning_rate": 1.3195760151569462e-05, "loss": 1.968, "step": 230000 }, { "epoch": 1.47, "eval_loss": 2.205911636352539, "eval_runtime": 19.2592, "eval_samples_per_second": 103.846, "eval_steps_per_second": 1.662, "step": 230000 }, { "epoch": 1.48, "learning_rate": 1.3035741717445851e-05, "loss": 1.9656, "step": 231000 }, { "epoch": 1.48, "eval_loss": 2.2183620929718018, "eval_runtime": 19.2973, "eval_samples_per_second": 103.641, "eval_steps_per_second": 1.658, "step": 231000 }, { "epoch": 1.48, "learning_rate": 1.2875723283322239e-05, "loss": 1.9728, "step": 232000 }, { "epoch": 1.48, "eval_loss": 2.1920948028564453, "eval_runtime": 19.4211, "eval_samples_per_second": 102.981, "eval_steps_per_second": 1.648, "step": 232000 }, { "epoch": 1.49, "learning_rate": 1.271570484919863e-05, "loss": 1.9577, "step": 233000 }, { "epoch": 1.49, "eval_loss": 2.191782236099243, "eval_runtime": 19.3617, "eval_samples_per_second": 103.296, "eval_steps_per_second": 1.653, "step": 233000 }, { "epoch": 1.5, "learning_rate": 1.2555686415075015e-05, "loss": 1.9777, "step": 234000 }, { "epoch": 1.5, "eval_loss": 2.209336042404175, "eval_runtime": 19.3939, "eval_samples_per_second": 103.125, "eval_steps_per_second": 1.65, "step": 234000 }, { "epoch": 1.5, "learning_rate": 1.2395667980951406e-05, "loss": 1.9662, "step": 235000 }, { "epoch": 1.5, "eval_loss": 2.152353048324585, "eval_runtime": 19.7245, "eval_samples_per_second": 101.397, "eval_steps_per_second": 1.622, "step": 235000 }, { "epoch": 1.51, "learning_rate": 1.2235649546827795e-05, "loss": 1.9681, "step": 236000 }, { "epoch": 1.51, "eval_loss": 2.1999175548553467, "eval_runtime": 18.9532, "eval_samples_per_second": 105.523, "eval_steps_per_second": 1.688, "step": 236000 }, { "epoch": 1.52, "learning_rate": 1.2075631112704184e-05, "loss": 1.9543, "step": 237000 }, { "epoch": 1.52, "eval_loss": 2.1981661319732666, "eval_runtime": 19.2785, "eval_samples_per_second": 103.742, "eval_steps_per_second": 1.66, "step": 237000 }, { "epoch": 1.52, "learning_rate": 1.1915612678580574e-05, "loss": 1.9636, "step": 238000 }, { "epoch": 1.52, "eval_loss": 2.197685956954956, "eval_runtime": 19.3506, "eval_samples_per_second": 103.356, "eval_steps_per_second": 1.654, "step": 238000 }, { "epoch": 1.53, "learning_rate": 1.1755594244456961e-05, "loss": 1.9623, "step": 239000 }, { "epoch": 1.53, "eval_loss": 2.207620620727539, "eval_runtime": 19.1912, "eval_samples_per_second": 104.214, "eval_steps_per_second": 1.667, "step": 239000 }, { "epoch": 1.54, "learning_rate": 1.159557581033335e-05, "loss": 1.9645, "step": 240000 }, { "epoch": 1.54, "eval_loss": 2.1756386756896973, "eval_runtime": 19.1978, "eval_samples_per_second": 104.178, "eval_steps_per_second": 1.667, "step": 240000 }, { "epoch": 1.54, "learning_rate": 1.143555737620974e-05, "loss": 1.9676, "step": 241000 }, { "epoch": 1.54, "eval_loss": 2.1699678897857666, "eval_runtime": 19.2027, "eval_samples_per_second": 104.152, "eval_steps_per_second": 1.666, "step": 241000 }, { "epoch": 1.55, "learning_rate": 1.1275538942086129e-05, "loss": 1.9552, "step": 242000 }, { "epoch": 1.55, "eval_loss": 2.1813385486602783, "eval_runtime": 19.1939, "eval_samples_per_second": 104.2, "eval_steps_per_second": 1.667, "step": 242000 }, { "epoch": 1.56, "learning_rate": 1.1115520507962518e-05, "loss": 1.9675, "step": 243000 }, { "epoch": 1.56, "eval_loss": 2.1804428100585938, "eval_runtime": 19.3246, "eval_samples_per_second": 103.495, "eval_steps_per_second": 1.656, "step": 243000 }, { "epoch": 1.56, "learning_rate": 1.0955502073838907e-05, "loss": 1.9707, "step": 244000 }, { "epoch": 1.56, "eval_loss": 2.1776347160339355, "eval_runtime": 19.4613, "eval_samples_per_second": 102.768, "eval_steps_per_second": 1.644, "step": 244000 }, { "epoch": 1.57, "learning_rate": 1.0795483639715295e-05, "loss": 1.9609, "step": 245000 }, { "epoch": 1.57, "eval_loss": 2.2101809978485107, "eval_runtime": 19.232, "eval_samples_per_second": 103.993, "eval_steps_per_second": 1.664, "step": 245000 }, { "epoch": 1.57, "learning_rate": 1.0635465205591686e-05, "loss": 1.9584, "step": 246000 }, { "epoch": 1.57, "eval_loss": 2.18208384513855, "eval_runtime": 19.1408, "eval_samples_per_second": 104.489, "eval_steps_per_second": 1.672, "step": 246000 }, { "epoch": 1.58, "learning_rate": 1.0475446771468075e-05, "loss": 1.9568, "step": 247000 }, { "epoch": 1.58, "eval_loss": 2.164984941482544, "eval_runtime": 19.2986, "eval_samples_per_second": 103.634, "eval_steps_per_second": 1.658, "step": 247000 }, { "epoch": 1.59, "learning_rate": 1.0315428337344462e-05, "loss": 1.9514, "step": 248000 }, { "epoch": 1.59, "eval_loss": 2.218735456466675, "eval_runtime": 19.5707, "eval_samples_per_second": 102.193, "eval_steps_per_second": 1.635, "step": 248000 }, { "epoch": 1.59, "learning_rate": 1.0155409903220851e-05, "loss": 1.9567, "step": 249000 }, { "epoch": 1.59, "eval_loss": 2.1572988033294678, "eval_runtime": 19.0634, "eval_samples_per_second": 104.913, "eval_steps_per_second": 1.679, "step": 249000 }, { "epoch": 1.6, "learning_rate": 9.99539146909724e-06, "loss": 1.9555, "step": 250000 }, { "epoch": 1.6, "eval_loss": 2.1475002765655518, "eval_runtime": 19.0267, "eval_samples_per_second": 105.115, "eval_steps_per_second": 1.682, "step": 250000 }, { "epoch": 1.61, "learning_rate": 9.83537303497363e-06, "loss": 1.965, "step": 251000 }, { "epoch": 1.61, "eval_loss": 2.1785731315612793, "eval_runtime": 19.7697, "eval_samples_per_second": 101.165, "eval_steps_per_second": 1.619, "step": 251000 }, { "epoch": 1.61, "learning_rate": 9.675354600850019e-06, "loss": 1.9508, "step": 252000 }, { "epoch": 1.61, "eval_loss": 2.1723153591156006, "eval_runtime": 19.1786, "eval_samples_per_second": 104.283, "eval_steps_per_second": 1.669, "step": 252000 }, { "epoch": 1.62, "learning_rate": 9.515336166726408e-06, "loss": 1.9522, "step": 253000 }, { "epoch": 1.62, "eval_loss": 2.180307626724243, "eval_runtime": 18.9009, "eval_samples_per_second": 105.815, "eval_steps_per_second": 1.693, "step": 253000 }, { "epoch": 1.63, "learning_rate": 9.355317732602796e-06, "loss": 1.9637, "step": 254000 }, { "epoch": 1.63, "eval_loss": 2.179806709289551, "eval_runtime": 19.3455, "eval_samples_per_second": 103.383, "eval_steps_per_second": 1.654, "step": 254000 }, { "epoch": 1.63, "learning_rate": 9.195299298479185e-06, "loss": 1.9588, "step": 255000 }, { "epoch": 1.63, "eval_loss": 2.200853109359741, "eval_runtime": 19.4782, "eval_samples_per_second": 102.679, "eval_steps_per_second": 1.643, "step": 255000 }, { "epoch": 1.64, "learning_rate": 9.035280864355574e-06, "loss": 1.9553, "step": 256000 }, { "epoch": 1.64, "eval_loss": 2.1626343727111816, "eval_runtime": 19.24, "eval_samples_per_second": 103.95, "eval_steps_per_second": 1.663, "step": 256000 }, { "epoch": 1.64, "learning_rate": 8.875262430231963e-06, "loss": 1.946, "step": 257000 }, { "epoch": 1.64, "eval_loss": 2.1843950748443604, "eval_runtime": 19.1181, "eval_samples_per_second": 104.613, "eval_steps_per_second": 1.674, "step": 257000 }, { "epoch": 1.65, "learning_rate": 8.715243996108352e-06, "loss": 1.9493, "step": 258000 }, { "epoch": 1.65, "eval_loss": 2.150207757949829, "eval_runtime": 19.2502, "eval_samples_per_second": 103.895, "eval_steps_per_second": 1.662, "step": 258000 }, { "epoch": 1.66, "learning_rate": 8.55522556198474e-06, "loss": 1.9442, "step": 259000 }, { "epoch": 1.66, "eval_loss": 2.1614534854888916, "eval_runtime": 19.2393, "eval_samples_per_second": 103.954, "eval_steps_per_second": 1.663, "step": 259000 }, { "epoch": 1.66, "learning_rate": 8.395207127861129e-06, "loss": 1.945, "step": 260000 }, { "epoch": 1.66, "eval_loss": 2.178889751434326, "eval_runtime": 19.4657, "eval_samples_per_second": 102.745, "eval_steps_per_second": 1.644, "step": 260000 }, { "epoch": 1.67, "learning_rate": 8.23518869373752e-06, "loss": 1.9368, "step": 261000 }, { "epoch": 1.67, "eval_loss": 2.172461986541748, "eval_runtime": 19.2788, "eval_samples_per_second": 103.741, "eval_steps_per_second": 1.66, "step": 261000 }, { "epoch": 1.68, "learning_rate": 8.075170259613907e-06, "loss": 1.9393, "step": 262000 }, { "epoch": 1.68, "eval_loss": 2.169734001159668, "eval_runtime": 19.3666, "eval_samples_per_second": 103.27, "eval_steps_per_second": 1.652, "step": 262000 }, { "epoch": 1.68, "learning_rate": 7.915151825490297e-06, "loss": 1.9525, "step": 263000 }, { "epoch": 1.68, "eval_loss": 2.1597206592559814, "eval_runtime": 19.3459, "eval_samples_per_second": 103.381, "eval_steps_per_second": 1.654, "step": 263000 }, { "epoch": 1.69, "learning_rate": 7.755133391366686e-06, "loss": 1.9444, "step": 264000 }, { "epoch": 1.69, "eval_loss": 2.1798765659332275, "eval_runtime": 19.0083, "eval_samples_per_second": 105.217, "eval_steps_per_second": 1.683, "step": 264000 }, { "epoch": 1.7, "learning_rate": 7.595114957243074e-06, "loss": 1.9352, "step": 265000 }, { "epoch": 1.7, "eval_loss": 2.164872169494629, "eval_runtime": 19.1384, "eval_samples_per_second": 104.502, "eval_steps_per_second": 1.672, "step": 265000 }, { "epoch": 1.7, "learning_rate": 7.435096523119464e-06, "loss": 1.9537, "step": 266000 }, { "epoch": 1.7, "eval_loss": 2.1663596630096436, "eval_runtime": 19.6791, "eval_samples_per_second": 101.63, "eval_steps_per_second": 1.626, "step": 266000 }, { "epoch": 1.71, "learning_rate": 7.2750780889958526e-06, "loss": 1.9399, "step": 267000 }, { "epoch": 1.71, "eval_loss": 2.1855850219726562, "eval_runtime": 19.3954, "eval_samples_per_second": 103.117, "eval_steps_per_second": 1.65, "step": 267000 }, { "epoch": 1.72, "learning_rate": 7.115059654872242e-06, "loss": 1.9325, "step": 268000 }, { "epoch": 1.72, "eval_loss": 2.1838717460632324, "eval_runtime": 19.1074, "eval_samples_per_second": 104.671, "eval_steps_per_second": 1.675, "step": 268000 }, { "epoch": 1.72, "learning_rate": 6.95504122074863e-06, "loss": 1.9466, "step": 269000 }, { "epoch": 1.72, "eval_loss": 2.1524887084960938, "eval_runtime": 19.375, "eval_samples_per_second": 103.226, "eval_steps_per_second": 1.652, "step": 269000 }, { "epoch": 1.73, "learning_rate": 6.79502278662502e-06, "loss": 1.9403, "step": 270000 }, { "epoch": 1.73, "eval_loss": 2.1773369312286377, "eval_runtime": 19.1103, "eval_samples_per_second": 104.656, "eval_steps_per_second": 1.674, "step": 270000 }, { "epoch": 1.73, "learning_rate": 6.6350043525014085e-06, "loss": 1.9391, "step": 271000 }, { "epoch": 1.73, "eval_loss": 2.212693452835083, "eval_runtime": 19.2143, "eval_samples_per_second": 104.089, "eval_steps_per_second": 1.665, "step": 271000 }, { "epoch": 1.74, "learning_rate": 6.474985918377798e-06, "loss": 1.9419, "step": 272000 }, { "epoch": 1.74, "eval_loss": 2.1781909465789795, "eval_runtime": 19.4708, "eval_samples_per_second": 102.718, "eval_steps_per_second": 1.643, "step": 272000 }, { "epoch": 1.75, "learning_rate": 6.314967484254186e-06, "loss": 1.9454, "step": 273000 }, { "epoch": 1.75, "eval_loss": 2.1962130069732666, "eval_runtime": 18.6565, "eval_samples_per_second": 107.201, "eval_steps_per_second": 1.715, "step": 273000 }, { "epoch": 1.75, "learning_rate": 6.154949050130575e-06, "loss": 1.946, "step": 274000 }, { "epoch": 1.75, "eval_loss": 2.157792091369629, "eval_runtime": 19.0429, "eval_samples_per_second": 105.026, "eval_steps_per_second": 1.68, "step": 274000 }, { "epoch": 1.76, "learning_rate": 5.994930616006964e-06, "loss": 1.9339, "step": 275000 }, { "epoch": 1.76, "eval_loss": 2.190920829772949, "eval_runtime": 18.7174, "eval_samples_per_second": 106.853, "eval_steps_per_second": 1.71, "step": 275000 }, { "epoch": 1.77, "learning_rate": 5.8349121818833536e-06, "loss": 1.9289, "step": 276000 }, { "epoch": 1.77, "eval_loss": 2.169802665710449, "eval_runtime": 19.7624, "eval_samples_per_second": 101.202, "eval_steps_per_second": 1.619, "step": 276000 }, { "epoch": 1.77, "learning_rate": 5.674893747759742e-06, "loss": 1.9284, "step": 277000 }, { "epoch": 1.77, "eval_loss": 2.149372100830078, "eval_runtime": 18.847, "eval_samples_per_second": 106.118, "eval_steps_per_second": 1.698, "step": 277000 }, { "epoch": 1.78, "learning_rate": 5.514875313636131e-06, "loss": 1.9423, "step": 278000 }, { "epoch": 1.78, "eval_loss": 2.163377046585083, "eval_runtime": 19.097, "eval_samples_per_second": 104.728, "eval_steps_per_second": 1.676, "step": 278000 }, { "epoch": 1.79, "learning_rate": 5.35485687951252e-06, "loss": 1.9317, "step": 279000 }, { "epoch": 1.79, "eval_loss": 2.129027843475342, "eval_runtime": 18.715, "eval_samples_per_second": 106.866, "eval_steps_per_second": 1.71, "step": 279000 }, { "epoch": 1.79, "learning_rate": 5.194838445388909e-06, "loss": 1.9216, "step": 280000 }, { "epoch": 1.79, "eval_loss": 2.171983480453491, "eval_runtime": 18.8986, "eval_samples_per_second": 105.828, "eval_steps_per_second": 1.693, "step": 280000 }, { "epoch": 1.8, "learning_rate": 5.034820011265298e-06, "loss": 1.9176, "step": 281000 }, { "epoch": 1.8, "eval_loss": 2.1561877727508545, "eval_runtime": 18.6229, "eval_samples_per_second": 107.395, "eval_steps_per_second": 1.718, "step": 281000 }, { "epoch": 1.81, "learning_rate": 4.874801577141687e-06, "loss": 1.9345, "step": 282000 }, { "epoch": 1.81, "eval_loss": 2.1655592918395996, "eval_runtime": 18.6917, "eval_samples_per_second": 106.999, "eval_steps_per_second": 1.712, "step": 282000 }, { "epoch": 1.81, "learning_rate": 4.714783143018076e-06, "loss": 1.9431, "step": 283000 }, { "epoch": 1.81, "eval_loss": 2.1130497455596924, "eval_runtime": 18.7533, "eval_samples_per_second": 106.648, "eval_steps_per_second": 1.706, "step": 283000 }, { "epoch": 1.82, "learning_rate": 4.5547647088944646e-06, "loss": 1.936, "step": 284000 }, { "epoch": 1.82, "eval_loss": 2.1281943321228027, "eval_runtime": 18.4643, "eval_samples_per_second": 108.317, "eval_steps_per_second": 1.733, "step": 284000 }, { "epoch": 1.82, "learning_rate": 4.394746274770854e-06, "loss": 1.9344, "step": 285000 }, { "epoch": 1.82, "eval_loss": 2.142157554626465, "eval_runtime": 18.6731, "eval_samples_per_second": 107.106, "eval_steps_per_second": 1.714, "step": 285000 }, { "epoch": 1.83, "learning_rate": 4.234727840647243e-06, "loss": 1.9237, "step": 286000 }, { "epoch": 1.83, "eval_loss": 2.1462085247039795, "eval_runtime": 18.787, "eval_samples_per_second": 106.457, "eval_steps_per_second": 1.703, "step": 286000 }, { "epoch": 1.84, "learning_rate": 4.074709406523631e-06, "loss": 1.9309, "step": 287000 }, { "epoch": 1.84, "eval_loss": 2.1435041427612305, "eval_runtime": 18.7845, "eval_samples_per_second": 106.471, "eval_steps_per_second": 1.704, "step": 287000 }, { "epoch": 1.84, "learning_rate": 3.914690972400021e-06, "loss": 1.9239, "step": 288000 }, { "epoch": 1.84, "eval_loss": 2.152646064758301, "eval_runtime": 18.6983, "eval_samples_per_second": 106.961, "eval_steps_per_second": 1.711, "step": 288000 }, { "epoch": 1.85, "learning_rate": 3.7546725382764097e-06, "loss": 1.9168, "step": 289000 }, { "epoch": 1.85, "eval_loss": 2.1280956268310547, "eval_runtime": 18.8639, "eval_samples_per_second": 106.023, "eval_steps_per_second": 1.696, "step": 289000 }, { "epoch": 1.86, "learning_rate": 3.5946541041527984e-06, "loss": 1.9232, "step": 290000 }, { "epoch": 1.86, "eval_loss": 2.143430471420288, "eval_runtime": 18.873, "eval_samples_per_second": 105.971, "eval_steps_per_second": 1.696, "step": 290000 }, { "epoch": 1.86, "learning_rate": 3.4346356700291876e-06, "loss": 1.9338, "step": 291000 }, { "epoch": 1.86, "eval_loss": 2.1642520427703857, "eval_runtime": 18.6105, "eval_samples_per_second": 107.466, "eval_steps_per_second": 1.719, "step": 291000 }, { "epoch": 1.87, "learning_rate": 3.2746172359055764e-06, "loss": 1.9241, "step": 292000 }, { "epoch": 1.87, "eval_loss": 2.120400905609131, "eval_runtime": 18.6654, "eval_samples_per_second": 107.15, "eval_steps_per_second": 1.714, "step": 292000 }, { "epoch": 1.88, "learning_rate": 3.114598801781965e-06, "loss": 1.9209, "step": 293000 }, { "epoch": 1.88, "eval_loss": 2.1418490409851074, "eval_runtime": 18.986, "eval_samples_per_second": 105.341, "eval_steps_per_second": 1.685, "step": 293000 }, { "epoch": 1.88, "learning_rate": 2.9545803676583543e-06, "loss": 1.928, "step": 294000 }, { "epoch": 1.88, "eval_loss": 2.1255481243133545, "eval_runtime": 18.6322, "eval_samples_per_second": 107.341, "eval_steps_per_second": 1.717, "step": 294000 }, { "epoch": 1.89, "learning_rate": 2.7945619335347435e-06, "loss": 1.9482, "step": 295000 }, { "epoch": 1.89, "eval_loss": 2.185188055038452, "eval_runtime": 18.6065, "eval_samples_per_second": 107.489, "eval_steps_per_second": 1.72, "step": 295000 }, { "epoch": 1.89, "learning_rate": 2.6345434994111323e-06, "loss": 1.9276, "step": 296000 }, { "epoch": 1.89, "eval_loss": 2.1754209995269775, "eval_runtime": 18.6892, "eval_samples_per_second": 107.014, "eval_steps_per_second": 1.712, "step": 296000 }, { "epoch": 1.9, "learning_rate": 2.4745250652875215e-06, "loss": 1.9214, "step": 297000 }, { "epoch": 1.9, "eval_loss": 2.124568462371826, "eval_runtime": 18.6607, "eval_samples_per_second": 107.177, "eval_steps_per_second": 1.715, "step": 297000 }, { "epoch": 1.91, "learning_rate": 2.3145066311639102e-06, "loss": 1.9296, "step": 298000 }, { "epoch": 1.91, "eval_loss": 2.1418752670288086, "eval_runtime": 18.8993, "eval_samples_per_second": 105.824, "eval_steps_per_second": 1.693, "step": 298000 }, { "epoch": 1.91, "learning_rate": 2.154488197040299e-06, "loss": 1.9182, "step": 299000 }, { "epoch": 1.91, "eval_loss": 2.1427695751190186, "eval_runtime": 18.6439, "eval_samples_per_second": 107.273, "eval_steps_per_second": 1.716, "step": 299000 }, { "epoch": 1.92, "learning_rate": 1.994469762916688e-06, "loss": 1.9172, "step": 300000 }, { "epoch": 1.92, "eval_loss": 2.17488956451416, "eval_runtime": 20.0248, "eval_samples_per_second": 99.876, "eval_steps_per_second": 1.598, "step": 300000 }, { "epoch": 1.93, "learning_rate": 1.834451328793077e-06, "loss": 1.9054, "step": 301000 }, { "epoch": 1.93, "eval_loss": 2.1516401767730713, "eval_runtime": 19.1509, "eval_samples_per_second": 104.434, "eval_steps_per_second": 1.671, "step": 301000 }, { "epoch": 1.93, "learning_rate": 1.674432894669466e-06, "loss": 1.9209, "step": 302000 }, { "epoch": 1.93, "eval_loss": 2.1247944831848145, "eval_runtime": 19.0766, "eval_samples_per_second": 104.84, "eval_steps_per_second": 1.677, "step": 302000 }, { "epoch": 1.94, "learning_rate": 1.5144144605458551e-06, "loss": 1.9191, "step": 303000 }, { "epoch": 1.94, "eval_loss": 2.1422977447509766, "eval_runtime": 19.0887, "eval_samples_per_second": 104.774, "eval_steps_per_second": 1.676, "step": 303000 }, { "epoch": 1.95, "learning_rate": 1.354396026422244e-06, "loss": 1.9143, "step": 304000 }, { "epoch": 1.95, "eval_loss": 2.1302106380462646, "eval_runtime": 19.5033, "eval_samples_per_second": 102.547, "eval_steps_per_second": 1.641, "step": 304000 }, { "epoch": 1.95, "learning_rate": 1.1943775922986329e-06, "loss": 1.9163, "step": 305000 }, { "epoch": 1.95, "eval_loss": 2.16552472114563, "eval_runtime": 18.815, "eval_samples_per_second": 106.298, "eval_steps_per_second": 1.701, "step": 305000 }, { "epoch": 1.96, "learning_rate": 1.0343591581750219e-06, "loss": 1.915, "step": 306000 }, { "epoch": 1.96, "eval_loss": 2.1272425651550293, "eval_runtime": 19.1159, "eval_samples_per_second": 104.625, "eval_steps_per_second": 1.674, "step": 306000 }, { "epoch": 1.97, "learning_rate": 8.743407240514107e-07, "loss": 1.9193, "step": 307000 }, { "epoch": 1.97, "eval_loss": 2.151264190673828, "eval_runtime": 18.961, "eval_samples_per_second": 105.48, "eval_steps_per_second": 1.688, "step": 307000 }, { "epoch": 1.97, "learning_rate": 7.143222899277997e-07, "loss": 1.9238, "step": 308000 }, { "epoch": 1.97, "eval_loss": 2.145237922668457, "eval_runtime": 19.4596, "eval_samples_per_second": 102.777, "eval_steps_per_second": 1.644, "step": 308000 }, { "epoch": 1.98, "learning_rate": 5.543038558041887e-07, "loss": 1.9129, "step": 309000 }, { "epoch": 1.98, "eval_loss": 2.132681369781494, "eval_runtime": 18.9129, "eval_samples_per_second": 105.748, "eval_steps_per_second": 1.692, "step": 309000 }, { "epoch": 1.98, "learning_rate": 3.9428542168057766e-07, "loss": 1.92, "step": 310000 }, { "epoch": 1.98, "eval_loss": 2.1479594707489014, "eval_runtime": 18.8663, "eval_samples_per_second": 106.009, "eval_steps_per_second": 1.696, "step": 310000 }, { "epoch": 1.99, "learning_rate": 2.342669875569666e-07, "loss": 1.9098, "step": 311000 }, { "epoch": 1.99, "eval_loss": 2.171926736831665, "eval_runtime": 19.0151, "eval_samples_per_second": 105.179, "eval_steps_per_second": 1.683, "step": 311000 }, { "epoch": 2.0, "learning_rate": 7.424855343335553e-08, "loss": 1.9105, "step": 312000 }, { "epoch": 2.0, "eval_loss": 2.1461212635040283, "eval_runtime": 19.4871, "eval_samples_per_second": 102.632, "eval_steps_per_second": 1.642, "step": 312000 }, { "epoch": 2.0, "learning_rate": 1.6609486746206498e-05, "loss": 1.9453, "step": 313000 }, { "epoch": 2.0, "eval_loss": 2.190183162689209, "eval_runtime": 16.3864, "eval_samples_per_second": 122.052, "eval_steps_per_second": 1.953, "step": 313000 }, { "epoch": 2.01, "learning_rate": 1.650280779012409e-05, "loss": 1.9458, "step": 314000 }, { "epoch": 2.01, "eval_loss": 2.1692402362823486, "eval_runtime": 15.9646, "eval_samples_per_second": 125.277, "eval_steps_per_second": 2.004, "step": 314000 }, { "epoch": 2.02, "learning_rate": 1.639612883404168e-05, "loss": 1.9428, "step": 315000 }, { "epoch": 2.02, "eval_loss": 2.1538236141204834, "eval_runtime": 16.0153, "eval_samples_per_second": 124.881, "eval_steps_per_second": 1.998, "step": 315000 }, { "epoch": 2.02, "learning_rate": 1.6289449877959276e-05, "loss": 1.9488, "step": 316000 }, { "epoch": 2.02, "eval_loss": 2.153665542602539, "eval_runtime": 16.1132, "eval_samples_per_second": 124.122, "eval_steps_per_second": 1.986, "step": 316000 }, { "epoch": 2.03, "learning_rate": 1.6182770921876865e-05, "loss": 1.9437, "step": 317000 }, { "epoch": 2.03, "eval_loss": 2.1973447799682617, "eval_runtime": 17.5461, "eval_samples_per_second": 113.986, "eval_steps_per_second": 1.824, "step": 317000 }, { "epoch": 2.04, "learning_rate": 1.607609196579446e-05, "loss": 1.9487, "step": 318000 }, { "epoch": 2.04, "eval_loss": 2.1677041053771973, "eval_runtime": 15.9539, "eval_samples_per_second": 125.361, "eval_steps_per_second": 2.006, "step": 318000 }, { "epoch": 2.04, "learning_rate": 1.596941300971205e-05, "loss": 1.9559, "step": 319000 }, { "epoch": 2.04, "eval_loss": 2.155820369720459, "eval_runtime": 16.1853, "eval_samples_per_second": 123.569, "eval_steps_per_second": 1.977, "step": 319000 }, { "epoch": 2.05, "learning_rate": 1.5862734053629647e-05, "loss": 1.9662, "step": 320000 }, { "epoch": 2.05, "eval_loss": 2.1629369258880615, "eval_runtime": 16.6642, "eval_samples_per_second": 120.018, "eval_steps_per_second": 1.92, "step": 320000 }, { "epoch": 2.05, "learning_rate": 1.5756055097547238e-05, "loss": 1.9556, "step": 321000 }, { "epoch": 2.05, "eval_loss": 2.1815614700317383, "eval_runtime": 16.5814, "eval_samples_per_second": 120.617, "eval_steps_per_second": 1.93, "step": 321000 }, { "epoch": 2.06, "learning_rate": 1.564937614146483e-05, "loss": 1.9512, "step": 322000 }, { "epoch": 2.06, "eval_loss": 2.1164066791534424, "eval_runtime": 15.9298, "eval_samples_per_second": 125.551, "eval_steps_per_second": 2.009, "step": 322000 }, { "epoch": 2.07, "learning_rate": 1.5542697185382425e-05, "loss": 1.9544, "step": 323000 }, { "epoch": 2.07, "eval_loss": 2.165865659713745, "eval_runtime": 15.883, "eval_samples_per_second": 125.921, "eval_steps_per_second": 2.015, "step": 323000 }, { "epoch": 2.07, "learning_rate": 1.5436018229300017e-05, "loss": 1.9568, "step": 324000 }, { "epoch": 2.07, "eval_loss": 2.1747090816497803, "eval_runtime": 15.9331, "eval_samples_per_second": 125.525, "eval_steps_per_second": 2.008, "step": 324000 }, { "epoch": 2.08, "learning_rate": 1.5329339273217608e-05, "loss": 1.9449, "step": 325000 }, { "epoch": 2.08, "eval_loss": 2.1862990856170654, "eval_runtime": 16.6952, "eval_samples_per_second": 119.795, "eval_steps_per_second": 1.917, "step": 325000 }, { "epoch": 2.09, "learning_rate": 1.5222660317135202e-05, "loss": 1.9491, "step": 326000 }, { "epoch": 2.09, "eval_loss": 2.1621673107147217, "eval_runtime": 15.9325, "eval_samples_per_second": 125.529, "eval_steps_per_second": 2.008, "step": 326000 }, { "epoch": 2.09, "learning_rate": 1.5115981361052795e-05, "loss": 1.9526, "step": 327000 }, { "epoch": 2.09, "eval_loss": 2.1826863288879395, "eval_runtime": 15.8228, "eval_samples_per_second": 126.4, "eval_steps_per_second": 2.022, "step": 327000 }, { "epoch": 2.1, "learning_rate": 1.5009302404970388e-05, "loss": 1.952, "step": 328000 }, { "epoch": 2.1, "eval_loss": 2.1913392543792725, "eval_runtime": 16.1458, "eval_samples_per_second": 123.871, "eval_steps_per_second": 1.982, "step": 328000 }, { "epoch": 2.11, "learning_rate": 1.4902623448887978e-05, "loss": 1.9545, "step": 329000 }, { "epoch": 2.11, "eval_loss": 2.18023681640625, "eval_runtime": 16.3141, "eval_samples_per_second": 122.593, "eval_steps_per_second": 1.961, "step": 329000 }, { "epoch": 2.11, "learning_rate": 1.4795944492805572e-05, "loss": 1.9616, "step": 330000 }, { "epoch": 2.11, "eval_loss": 2.178854465484619, "eval_runtime": 15.9442, "eval_samples_per_second": 125.438, "eval_steps_per_second": 2.007, "step": 330000 }, { "epoch": 2.12, "learning_rate": 1.4689265536723165e-05, "loss": 1.9515, "step": 331000 }, { "epoch": 2.12, "eval_loss": 2.1725854873657227, "eval_runtime": 15.939, "eval_samples_per_second": 125.478, "eval_steps_per_second": 2.008, "step": 331000 }, { "epoch": 2.13, "learning_rate": 1.4582586580640758e-05, "loss": 1.9484, "step": 332000 }, { "epoch": 2.13, "eval_loss": 2.1632540225982666, "eval_runtime": 16.7042, "eval_samples_per_second": 119.731, "eval_steps_per_second": 1.916, "step": 332000 }, { "epoch": 2.13, "learning_rate": 1.4475907624558348e-05, "loss": 1.962, "step": 333000 }, { "epoch": 2.13, "eval_loss": 2.1514594554901123, "eval_runtime": 15.9872, "eval_samples_per_second": 125.1, "eval_steps_per_second": 2.002, "step": 333000 }, { "epoch": 2.14, "learning_rate": 1.4369228668475942e-05, "loss": 1.9563, "step": 334000 }, { "epoch": 2.14, "eval_loss": 2.18198299407959, "eval_runtime": 16.1962, "eval_samples_per_second": 123.486, "eval_steps_per_second": 1.976, "step": 334000 }, { "epoch": 2.14, "learning_rate": 1.4262549712393535e-05, "loss": 1.9544, "step": 335000 }, { "epoch": 2.14, "eval_loss": 2.168269634246826, "eval_runtime": 16.3411, "eval_samples_per_second": 122.391, "eval_steps_per_second": 1.958, "step": 335000 }, { "epoch": 2.15, "learning_rate": 1.4155870756311127e-05, "loss": 1.9509, "step": 336000 }, { "epoch": 2.15, "eval_loss": 2.157496690750122, "eval_runtime": 16.7663, "eval_samples_per_second": 119.287, "eval_steps_per_second": 1.909, "step": 336000 }, { "epoch": 2.16, "learning_rate": 1.404919180022872e-05, "loss": 1.9527, "step": 337000 }, { "epoch": 2.16, "eval_loss": 2.162778377532959, "eval_runtime": 16.4574, "eval_samples_per_second": 121.526, "eval_steps_per_second": 1.944, "step": 337000 }, { "epoch": 2.16, "learning_rate": 1.3942512844146313e-05, "loss": 1.9455, "step": 338000 }, { "epoch": 2.16, "eval_loss": 2.2115304470062256, "eval_runtime": 15.9425, "eval_samples_per_second": 125.451, "eval_steps_per_second": 2.007, "step": 338000 }, { "epoch": 2.17, "learning_rate": 1.3835833888063907e-05, "loss": 1.9443, "step": 339000 }, { "epoch": 2.17, "eval_loss": 2.1575698852539062, "eval_runtime": 16.1638, "eval_samples_per_second": 123.734, "eval_steps_per_second": 1.98, "step": 339000 }, { "epoch": 2.18, "learning_rate": 1.3729154931981497e-05, "loss": 1.9471, "step": 340000 }, { "epoch": 2.18, "eval_loss": 2.163440465927124, "eval_runtime": 16.5887, "eval_samples_per_second": 120.564, "eval_steps_per_second": 1.929, "step": 340000 }, { "epoch": 2.18, "learning_rate": 1.362247597589909e-05, "loss": 1.9385, "step": 341000 }, { "epoch": 2.18, "eval_loss": 2.1808547973632812, "eval_runtime": 16.0292, "eval_samples_per_second": 124.773, "eval_steps_per_second": 1.996, "step": 341000 }, { "epoch": 2.19, "learning_rate": 1.3515797019816683e-05, "loss": 1.9472, "step": 342000 }, { "epoch": 2.19, "eval_loss": 2.1804370880126953, "eval_runtime": 16.1599, "eval_samples_per_second": 123.763, "eval_steps_per_second": 1.98, "step": 342000 }, { "epoch": 2.2, "learning_rate": 1.3409118063734277e-05, "loss": 1.9578, "step": 343000 }, { "epoch": 2.2, "eval_loss": 2.172938346862793, "eval_runtime": 16.4066, "eval_samples_per_second": 121.902, "eval_steps_per_second": 1.95, "step": 343000 }, { "epoch": 2.2, "learning_rate": 1.3302439107651868e-05, "loss": 1.9501, "step": 344000 }, { "epoch": 2.2, "eval_loss": 2.1206016540527344, "eval_runtime": 16.7126, "eval_samples_per_second": 119.67, "eval_steps_per_second": 1.915, "step": 344000 }, { "epoch": 2.21, "learning_rate": 1.3195760151569462e-05, "loss": 1.9363, "step": 345000 }, { "epoch": 2.21, "eval_loss": 2.1700916290283203, "eval_runtime": 15.9452, "eval_samples_per_second": 125.43, "eval_steps_per_second": 2.007, "step": 345000 }, { "epoch": 2.21, "learning_rate": 1.3089081195487055e-05, "loss": 1.9452, "step": 346000 }, { "epoch": 2.21, "eval_loss": 2.1466197967529297, "eval_runtime": 16.0688, "eval_samples_per_second": 124.465, "eval_steps_per_second": 1.991, "step": 346000 }, { "epoch": 2.22, "learning_rate": 1.2982402239404649e-05, "loss": 1.9544, "step": 347000 }, { "epoch": 2.22, "eval_loss": 2.118955135345459, "eval_runtime": 16.4559, "eval_samples_per_second": 121.537, "eval_steps_per_second": 1.945, "step": 347000 }, { "epoch": 2.23, "learning_rate": 1.2875723283322239e-05, "loss": 1.9442, "step": 348000 }, { "epoch": 2.23, "eval_loss": 2.2223548889160156, "eval_runtime": 15.9342, "eval_samples_per_second": 125.516, "eval_steps_per_second": 2.008, "step": 348000 }, { "epoch": 2.23, "learning_rate": 1.2769044327239832e-05, "loss": 1.949, "step": 349000 }, { "epoch": 2.23, "eval_loss": 2.1240313053131104, "eval_runtime": 16.1322, "eval_samples_per_second": 123.975, "eval_steps_per_second": 1.984, "step": 349000 }, { "epoch": 2.24, "learning_rate": 1.2662365371157425e-05, "loss": 1.9524, "step": 350000 }, { "epoch": 2.24, "eval_loss": 2.2078564167022705, "eval_runtime": 15.9714, "eval_samples_per_second": 125.224, "eval_steps_per_second": 2.004, "step": 350000 }, { "epoch": 2.25, "learning_rate": 1.2555686415075015e-05, "loss": 1.9371, "step": 351000 }, { "epoch": 2.25, "eval_loss": 2.1884605884552, "eval_runtime": 17.7436, "eval_samples_per_second": 112.717, "eval_steps_per_second": 1.803, "step": 351000 }, { "epoch": 2.25, "learning_rate": 1.2449007458992609e-05, "loss": 1.9474, "step": 352000 }, { "epoch": 2.25, "eval_loss": 2.165747880935669, "eval_runtime": 15.9774, "eval_samples_per_second": 125.177, "eval_steps_per_second": 2.003, "step": 352000 }, { "epoch": 2.26, "learning_rate": 1.2342328502910202e-05, "loss": 1.9444, "step": 353000 }, { "epoch": 2.26, "eval_loss": 2.180070161819458, "eval_runtime": 15.9059, "eval_samples_per_second": 125.74, "eval_steps_per_second": 2.012, "step": 353000 }, { "epoch": 2.27, "learning_rate": 1.2235649546827795e-05, "loss": 1.9381, "step": 354000 }, { "epoch": 2.27, "eval_loss": 2.195138931274414, "eval_runtime": 15.4982, "eval_samples_per_second": 129.047, "eval_steps_per_second": 2.065, "step": 354000 }, { "epoch": 2.27, "learning_rate": 1.2128970590745389e-05, "loss": 1.9462, "step": 355000 }, { "epoch": 2.27, "eval_loss": 2.197645902633667, "eval_runtime": 16.2722, "eval_samples_per_second": 122.909, "eval_steps_per_second": 1.967, "step": 355000 }, { "epoch": 2.28, "learning_rate": 1.202229163466298e-05, "loss": 1.9312, "step": 356000 }, { "epoch": 2.28, "eval_loss": 2.1800289154052734, "eval_runtime": 15.5962, "eval_samples_per_second": 128.236, "eval_steps_per_second": 2.052, "step": 356000 }, { "epoch": 2.29, "learning_rate": 1.1915612678580574e-05, "loss": 1.9379, "step": 357000 }, { "epoch": 2.29, "eval_loss": 2.175736427307129, "eval_runtime": 15.9665, "eval_samples_per_second": 125.262, "eval_steps_per_second": 2.004, "step": 357000 }, { "epoch": 2.29, "learning_rate": 1.1808933722498165e-05, "loss": 1.9435, "step": 358000 }, { "epoch": 2.29, "eval_loss": 2.205449104309082, "eval_runtime": 15.7121, "eval_samples_per_second": 127.291, "eval_steps_per_second": 2.037, "step": 358000 }, { "epoch": 2.3, "learning_rate": 1.1702254766415759e-05, "loss": 1.9448, "step": 359000 }, { "epoch": 2.3, "eval_loss": 2.173300266265869, "eval_runtime": 16.397, "eval_samples_per_second": 121.974, "eval_steps_per_second": 1.952, "step": 359000 }, { "epoch": 2.3, "learning_rate": 1.159557581033335e-05, "loss": 1.9529, "step": 360000 }, { "epoch": 2.3, "eval_loss": 2.145735263824463, "eval_runtime": 15.5694, "eval_samples_per_second": 128.457, "eval_steps_per_second": 2.055, "step": 360000 }, { "epoch": 2.31, "learning_rate": 1.1488896854250944e-05, "loss": 1.9444, "step": 361000 }, { "epoch": 2.31, "eval_loss": 2.1839778423309326, "eval_runtime": 15.6504, "eval_samples_per_second": 127.792, "eval_steps_per_second": 2.045, "step": 361000 }, { "epoch": 2.32, "learning_rate": 1.1382217898168535e-05, "loss": 1.9439, "step": 362000 }, { "epoch": 2.32, "eval_loss": 2.128485918045044, "eval_runtime": 15.7866, "eval_samples_per_second": 126.69, "eval_steps_per_second": 2.027, "step": 362000 }, { "epoch": 2.32, "learning_rate": 1.1275538942086129e-05, "loss": 1.9345, "step": 363000 }, { "epoch": 2.32, "eval_loss": 2.16981840133667, "eval_runtime": 16.0509, "eval_samples_per_second": 124.604, "eval_steps_per_second": 1.994, "step": 363000 }, { "epoch": 2.33, "learning_rate": 1.1168859986003722e-05, "loss": 1.9355, "step": 364000 }, { "epoch": 2.33, "eval_loss": 2.1235830783843994, "eval_runtime": 15.5068, "eval_samples_per_second": 128.975, "eval_steps_per_second": 2.064, "step": 364000 }, { "epoch": 2.34, "learning_rate": 1.1062181029921315e-05, "loss": 1.9385, "step": 365000 }, { "epoch": 2.34, "eval_loss": 2.1465463638305664, "eval_runtime": 15.3143, "eval_samples_per_second": 130.597, "eval_steps_per_second": 2.09, "step": 365000 }, { "epoch": 2.34, "learning_rate": 1.0955502073838907e-05, "loss": 1.9425, "step": 366000 }, { "epoch": 2.34, "eval_loss": 2.1613283157348633, "eval_runtime": 15.466, "eval_samples_per_second": 129.316, "eval_steps_per_second": 2.069, "step": 366000 }, { "epoch": 2.35, "learning_rate": 1.08488231177565e-05, "loss": 1.9304, "step": 367000 }, { "epoch": 2.35, "eval_loss": 2.172750949859619, "eval_runtime": 15.5842, "eval_samples_per_second": 128.335, "eval_steps_per_second": 2.053, "step": 367000 }, { "epoch": 2.36, "learning_rate": 1.0742144161674092e-05, "loss": 1.9339, "step": 368000 }, { "epoch": 2.36, "eval_loss": 2.148078680038452, "eval_runtime": 15.9481, "eval_samples_per_second": 125.407, "eval_steps_per_second": 2.007, "step": 368000 }, { "epoch": 2.36, "learning_rate": 1.0635465205591686e-05, "loss": 1.9463, "step": 369000 }, { "epoch": 2.36, "eval_loss": 2.1650550365448, "eval_runtime": 15.3617, "eval_samples_per_second": 130.194, "eval_steps_per_second": 2.083, "step": 369000 }, { "epoch": 2.37, "learning_rate": 1.0528786249509277e-05, "loss": 1.9407, "step": 370000 }, { "epoch": 2.37, "eval_loss": 2.1432077884674072, "eval_runtime": 15.1001, "eval_samples_per_second": 132.45, "eval_steps_per_second": 2.119, "step": 370000 }, { "epoch": 2.37, "learning_rate": 1.0422107293426869e-05, "loss": 1.9453, "step": 371000 }, { "epoch": 2.37, "eval_loss": 2.147706985473633, "eval_runtime": 15.9626, "eval_samples_per_second": 125.293, "eval_steps_per_second": 2.005, "step": 371000 }, { "epoch": 2.38, "learning_rate": 1.0315428337344462e-05, "loss": 1.9368, "step": 372000 }, { "epoch": 2.38, "eval_loss": 2.184664249420166, "eval_runtime": 15.5454, "eval_samples_per_second": 128.656, "eval_steps_per_second": 2.058, "step": 372000 }, { "epoch": 2.39, "learning_rate": 1.0208749381262054e-05, "loss": 1.9407, "step": 373000 }, { "epoch": 2.39, "eval_loss": 2.1857311725616455, "eval_runtime": 15.3498, "eval_samples_per_second": 130.295, "eval_steps_per_second": 2.085, "step": 373000 }, { "epoch": 2.39, "learning_rate": 1.0102070425179647e-05, "loss": 1.934, "step": 374000 }, { "epoch": 2.39, "eval_loss": 2.119173765182495, "eval_runtime": 15.4006, "eval_samples_per_second": 129.865, "eval_steps_per_second": 2.078, "step": 374000 }, { "epoch": 2.4, "learning_rate": 9.99539146909724e-06, "loss": 1.9297, "step": 375000 }, { "epoch": 2.4, "eval_loss": 2.1658694744110107, "eval_runtime": 15.796, "eval_samples_per_second": 126.615, "eval_steps_per_second": 2.026, "step": 375000 }, { "epoch": 2.41, "learning_rate": 9.888712513014834e-06, "loss": 1.9298, "step": 376000 }, { "epoch": 2.41, "eval_loss": 2.171632766723633, "eval_runtime": 15.3482, "eval_samples_per_second": 130.308, "eval_steps_per_second": 2.085, "step": 376000 }, { "epoch": 2.41, "learning_rate": 9.782033556932426e-06, "loss": 1.9267, "step": 377000 }, { "epoch": 2.41, "eval_loss": 2.1282413005828857, "eval_runtime": 15.2611, "eval_samples_per_second": 131.052, "eval_steps_per_second": 2.097, "step": 377000 }, { "epoch": 2.42, "learning_rate": 9.675354600850019e-06, "loss": 1.9387, "step": 378000 }, { "epoch": 2.42, "eval_loss": 2.175699472427368, "eval_runtime": 15.3352, "eval_samples_per_second": 130.419, "eval_steps_per_second": 2.087, "step": 378000 }, { "epoch": 2.43, "learning_rate": 9.56867564476761e-06, "loss": 1.9235, "step": 379000 }, { "epoch": 2.43, "eval_loss": 2.1758999824523926, "eval_runtime": 16.089, "eval_samples_per_second": 124.309, "eval_steps_per_second": 1.989, "step": 379000 }, { "epoch": 2.43, "learning_rate": 9.461996688685204e-06, "loss": 1.9265, "step": 380000 }, { "epoch": 2.43, "eval_loss": 2.163534164428711, "eval_runtime": 15.2326, "eval_samples_per_second": 131.297, "eval_steps_per_second": 2.101, "step": 380000 }, { "epoch": 2.44, "learning_rate": 9.355317732602796e-06, "loss": 1.9151, "step": 381000 }, { "epoch": 2.44, "eval_loss": 2.1671011447906494, "eval_runtime": 15.2621, "eval_samples_per_second": 131.044, "eval_steps_per_second": 2.097, "step": 381000 }, { "epoch": 2.45, "learning_rate": 9.248638776520389e-06, "loss": 1.9262, "step": 382000 }, { "epoch": 2.45, "eval_loss": 2.144550323486328, "eval_runtime": 15.6946, "eval_samples_per_second": 127.432, "eval_steps_per_second": 2.039, "step": 382000 }, { "epoch": 2.45, "learning_rate": 9.14195982043798e-06, "loss": 1.9311, "step": 383000 }, { "epoch": 2.45, "eval_loss": 2.1890273094177246, "eval_runtime": 15.377, "eval_samples_per_second": 130.065, "eval_steps_per_second": 2.081, "step": 383000 }, { "epoch": 2.46, "learning_rate": 9.035280864355574e-06, "loss": 1.9305, "step": 384000 }, { "epoch": 2.46, "eval_loss": 2.166837692260742, "eval_runtime": 15.3262, "eval_samples_per_second": 130.496, "eval_steps_per_second": 2.088, "step": 384000 }, { "epoch": 2.46, "learning_rate": 8.928601908273167e-06, "loss": 1.9237, "step": 385000 }, { "epoch": 2.46, "eval_loss": 2.0922629833221436, "eval_runtime": 15.1049, "eval_samples_per_second": 132.408, "eval_steps_per_second": 2.119, "step": 385000 }, { "epoch": 2.47, "learning_rate": 8.82192295219076e-06, "loss": 1.9256, "step": 386000 }, { "epoch": 2.47, "eval_loss": 2.1387295722961426, "eval_runtime": 15.8611, "eval_samples_per_second": 126.095, "eval_steps_per_second": 2.018, "step": 386000 }, { "epoch": 2.48, "learning_rate": 8.715243996108352e-06, "loss": 1.9339, "step": 387000 }, { "epoch": 2.48, "eval_loss": 2.160367250442505, "eval_runtime": 15.4895, "eval_samples_per_second": 129.12, "eval_steps_per_second": 2.066, "step": 387000 }, { "epoch": 2.48, "learning_rate": 8.608565040025944e-06, "loss": 1.925, "step": 388000 }, { "epoch": 2.48, "eval_loss": 2.1711387634277344, "eval_runtime": 15.39, "eval_samples_per_second": 129.955, "eval_steps_per_second": 2.079, "step": 388000 }, { "epoch": 2.49, "learning_rate": 8.501886083943537e-06, "loss": 1.9185, "step": 389000 }, { "epoch": 2.49, "eval_loss": 2.1491212844848633, "eval_runtime": 15.607, "eval_samples_per_second": 128.147, "eval_steps_per_second": 2.05, "step": 389000 }, { "epoch": 2.5, "learning_rate": 8.395207127861129e-06, "loss": 1.9214, "step": 390000 }, { "epoch": 2.5, "eval_loss": 2.1444971561431885, "eval_runtime": 15.4605, "eval_samples_per_second": 129.362, "eval_steps_per_second": 2.07, "step": 390000 }, { "epoch": 2.5, "learning_rate": 8.288528171778722e-06, "loss": 1.928, "step": 391000 }, { "epoch": 2.5, "eval_loss": 2.1359145641326904, "eval_runtime": 15.6126, "eval_samples_per_second": 128.102, "eval_steps_per_second": 2.05, "step": 391000 }, { "epoch": 2.51, "learning_rate": 8.181849215696314e-06, "loss": 1.9243, "step": 392000 }, { "epoch": 2.51, "eval_loss": 2.156005620956421, "eval_runtime": 15.7238, "eval_samples_per_second": 127.196, "eval_steps_per_second": 2.035, "step": 392000 }, { "epoch": 2.52, "learning_rate": 8.075170259613907e-06, "loss": 1.9096, "step": 393000 }, { "epoch": 2.52, "eval_loss": 2.1110196113586426, "eval_runtime": 15.176, "eval_samples_per_second": 131.787, "eval_steps_per_second": 2.109, "step": 393000 }, { "epoch": 2.52, "learning_rate": 7.9684913035315e-06, "loss": 1.9254, "step": 394000 }, { "epoch": 2.52, "eval_loss": 2.135141611099243, "eval_runtime": 15.4036, "eval_samples_per_second": 129.84, "eval_steps_per_second": 2.077, "step": 394000 }, { "epoch": 2.53, "learning_rate": 7.861812347449094e-06, "loss": 1.9214, "step": 395000 }, { "epoch": 2.53, "eval_loss": 2.1366610527038574, "eval_runtime": 15.7136, "eval_samples_per_second": 127.279, "eval_steps_per_second": 2.036, "step": 395000 }, { "epoch": 2.53, "learning_rate": 7.755133391366686e-06, "loss": 1.9229, "step": 396000 }, { "epoch": 2.53, "eval_loss": 2.1293559074401855, "eval_runtime": 15.3708, "eval_samples_per_second": 130.117, "eval_steps_per_second": 2.082, "step": 396000 }, { "epoch": 2.54, "learning_rate": 7.64845443528428e-06, "loss": 1.9166, "step": 397000 }, { "epoch": 2.54, "eval_loss": 2.1272215843200684, "eval_runtime": 15.7644, "eval_samples_per_second": 126.868, "eval_steps_per_second": 2.03, "step": 397000 }, { "epoch": 2.55, "learning_rate": 7.541775479201871e-06, "loss": 1.9152, "step": 398000 }, { "epoch": 2.55, "eval_loss": 2.1080117225646973, "eval_runtime": 15.3816, "eval_samples_per_second": 130.026, "eval_steps_per_second": 2.08, "step": 398000 }, { "epoch": 2.55, "learning_rate": 7.435096523119464e-06, "loss": 1.9138, "step": 399000 }, { "epoch": 2.55, "eval_loss": 2.156583309173584, "eval_runtime": 15.5093, "eval_samples_per_second": 128.955, "eval_steps_per_second": 2.063, "step": 399000 }, { "epoch": 2.56, "learning_rate": 7.328417567037056e-06, "loss": 1.9193, "step": 400000 }, { "epoch": 2.56, "eval_loss": 2.1462528705596924, "eval_runtime": 15.6345, "eval_samples_per_second": 127.923, "eval_steps_per_second": 2.047, "step": 400000 }, { "epoch": 2.57, "learning_rate": 7.221738610954649e-06, "loss": 1.9216, "step": 401000 }, { "epoch": 2.57, "eval_loss": 2.1311724185943604, "eval_runtime": 15.5304, "eval_samples_per_second": 128.78, "eval_steps_per_second": 2.06, "step": 401000 }, { "epoch": 2.57, "learning_rate": 7.115059654872242e-06, "loss": 1.9171, "step": 402000 }, { "epoch": 2.57, "eval_loss": 2.1334073543548584, "eval_runtime": 15.6034, "eval_samples_per_second": 128.177, "eval_steps_per_second": 2.051, "step": 402000 }, { "epoch": 2.58, "learning_rate": 7.008380698789835e-06, "loss": 1.9148, "step": 403000 }, { "epoch": 2.58, "eval_loss": 2.1480307579040527, "eval_runtime": 15.4786, "eval_samples_per_second": 129.211, "eval_steps_per_second": 2.067, "step": 403000 }, { "epoch": 2.59, "learning_rate": 6.901701742707427e-06, "loss": 1.9204, "step": 404000 }, { "epoch": 2.59, "eval_loss": 2.1620922088623047, "eval_runtime": 17.9933, "eval_samples_per_second": 111.152, "eval_steps_per_second": 1.778, "step": 404000 }, { "epoch": 2.59, "learning_rate": 6.79502278662502e-06, "loss": 1.9163, "step": 405000 }, { "epoch": 2.59, "eval_loss": 2.1261579990386963, "eval_runtime": 15.7916, "eval_samples_per_second": 126.65, "eval_steps_per_second": 2.026, "step": 405000 }, { "epoch": 2.6, "learning_rate": 6.688343830542612e-06, "loss": 1.9147, "step": 406000 }, { "epoch": 2.6, "eval_loss": 2.134714365005493, "eval_runtime": 15.563, "eval_samples_per_second": 128.51, "eval_steps_per_second": 2.056, "step": 406000 }, { "epoch": 2.61, "learning_rate": 6.581664874460204e-06, "loss": 1.9107, "step": 407000 }, { "epoch": 2.61, "eval_loss": 2.094939947128296, "eval_runtime": 15.3395, "eval_samples_per_second": 130.383, "eval_steps_per_second": 2.086, "step": 407000 }, { "epoch": 2.61, "learning_rate": 6.474985918377798e-06, "loss": 1.9185, "step": 408000 }, { "epoch": 2.61, "eval_loss": 2.1135287284851074, "eval_runtime": 15.2587, "eval_samples_per_second": 131.072, "eval_steps_per_second": 2.097, "step": 408000 }, { "epoch": 2.62, "learning_rate": 6.368306962295389e-06, "loss": 1.9134, "step": 409000 }, { "epoch": 2.62, "eval_loss": 2.1412642002105713, "eval_runtime": 15.702, "eval_samples_per_second": 127.372, "eval_steps_per_second": 2.038, "step": 409000 }, { "epoch": 2.62, "learning_rate": 6.261628006212983e-06, "loss": 1.9144, "step": 410000 }, { "epoch": 2.62, "eval_loss": 2.1682534217834473, "eval_runtime": 15.4072, "eval_samples_per_second": 129.81, "eval_steps_per_second": 2.077, "step": 410000 }, { "epoch": 2.63, "learning_rate": 6.154949050130575e-06, "loss": 1.9086, "step": 411000 }, { "epoch": 2.63, "eval_loss": 2.141894578933716, "eval_runtime": 15.208, "eval_samples_per_second": 131.51, "eval_steps_per_second": 2.104, "step": 411000 }, { "epoch": 2.64, "learning_rate": 6.0482700940481686e-06, "loss": 1.9101, "step": 412000 }, { "epoch": 2.64, "eval_loss": 2.1342506408691406, "eval_runtime": 15.2405, "eval_samples_per_second": 131.229, "eval_steps_per_second": 2.1, "step": 412000 }, { "epoch": 2.64, "learning_rate": 5.941591137965761e-06, "loss": 1.9086, "step": 413000 }, { "epoch": 2.64, "eval_loss": 2.097320318222046, "eval_runtime": 15.5657, "eval_samples_per_second": 128.488, "eval_steps_per_second": 2.056, "step": 413000 }, { "epoch": 2.65, "learning_rate": 5.8349121818833536e-06, "loss": 1.9089, "step": 414000 }, { "epoch": 2.65, "eval_loss": 2.1229472160339355, "eval_runtime": 15.1808, "eval_samples_per_second": 131.746, "eval_steps_per_second": 2.108, "step": 414000 }, { "epoch": 2.66, "learning_rate": 5.728233225800946e-06, "loss": 1.915, "step": 415000 }, { "epoch": 2.66, "eval_loss": 2.1642491817474365, "eval_runtime": 15.6522, "eval_samples_per_second": 127.777, "eval_steps_per_second": 2.044, "step": 415000 }, { "epoch": 2.66, "learning_rate": 5.621554269718539e-06, "loss": 1.914, "step": 416000 }, { "epoch": 2.66, "eval_loss": 2.1208455562591553, "eval_runtime": 15.453, "eval_samples_per_second": 129.425, "eval_steps_per_second": 2.071, "step": 416000 }, { "epoch": 2.67, "learning_rate": 5.514875313636131e-06, "loss": 1.9031, "step": 417000 }, { "epoch": 2.67, "eval_loss": 2.103487253189087, "eval_runtime": 15.4394, "eval_samples_per_second": 129.539, "eval_steps_per_second": 2.073, "step": 417000 }, { "epoch": 2.68, "learning_rate": 5.408196357553724e-06, "loss": 1.9015, "step": 418000 }, { "epoch": 2.68, "eval_loss": 2.1312220096588135, "eval_runtime": 15.3068, "eval_samples_per_second": 130.661, "eval_steps_per_second": 2.091, "step": 418000 }, { "epoch": 2.68, "learning_rate": 5.301517401471316e-06, "loss": 1.9069, "step": 419000 }, { "epoch": 2.68, "eval_loss": 2.1444790363311768, "eval_runtime": 15.4574, "eval_samples_per_second": 129.388, "eval_steps_per_second": 2.07, "step": 419000 }, { "epoch": 2.69, "learning_rate": 5.194838445388909e-06, "loss": 1.9016, "step": 420000 }, { "epoch": 2.69, "eval_loss": 2.1105127334594727, "eval_runtime": 15.3042, "eval_samples_per_second": 130.683, "eval_steps_per_second": 2.091, "step": 420000 }, { "epoch": 2.69, "learning_rate": 5.088159489306501e-06, "loss": 1.8882, "step": 421000 }, { "epoch": 2.69, "eval_loss": 2.151632785797119, "eval_runtime": 15.8977, "eval_samples_per_second": 125.805, "eval_steps_per_second": 2.013, "step": 421000 }, { "epoch": 2.7, "learning_rate": 4.9814805332240945e-06, "loss": 1.9158, "step": 422000 }, { "epoch": 2.7, "eval_loss": 2.1242105960845947, "eval_runtime": 15.298, "eval_samples_per_second": 130.736, "eval_steps_per_second": 2.092, "step": 422000 }, { "epoch": 2.71, "learning_rate": 4.874801577141687e-06, "loss": 1.9136, "step": 423000 }, { "epoch": 2.71, "eval_loss": 2.1192123889923096, "eval_runtime": 15.1175, "eval_samples_per_second": 132.297, "eval_steps_per_second": 2.117, "step": 423000 }, { "epoch": 2.71, "learning_rate": 4.7681226210592795e-06, "loss": 1.916, "step": 424000 }, { "epoch": 2.71, "eval_loss": 2.1400868892669678, "eval_runtime": 15.3165, "eval_samples_per_second": 130.578, "eval_steps_per_second": 2.089, "step": 424000 }, { "epoch": 2.72, "learning_rate": 4.661443664976872e-06, "loss": 1.8986, "step": 425000 }, { "epoch": 2.72, "eval_loss": 2.158984899520874, "eval_runtime": 15.2786, "eval_samples_per_second": 130.902, "eval_steps_per_second": 2.094, "step": 425000 }, { "epoch": 2.73, "learning_rate": 4.5547647088944646e-06, "loss": 1.9046, "step": 426000 }, { "epoch": 2.73, "eval_loss": 2.1008715629577637, "eval_runtime": 15.3482, "eval_samples_per_second": 130.309, "eval_steps_per_second": 2.085, "step": 426000 }, { "epoch": 2.73, "learning_rate": 4.448085752812058e-06, "loss": 1.9019, "step": 427000 }, { "epoch": 2.73, "eval_loss": 2.1234779357910156, "eval_runtime": 15.3947, "eval_samples_per_second": 129.915, "eval_steps_per_second": 2.079, "step": 427000 }, { "epoch": 2.74, "learning_rate": 4.34140679672965e-06, "loss": 1.9075, "step": 428000 }, { "epoch": 2.74, "eval_loss": 2.1445555686950684, "eval_runtime": 15.263, "eval_samples_per_second": 131.036, "eval_steps_per_second": 2.097, "step": 428000 }, { "epoch": 2.75, "learning_rate": 4.234727840647243e-06, "loss": 1.9023, "step": 429000 }, { "epoch": 2.75, "eval_loss": 2.1059927940368652, "eval_runtime": 15.6241, "eval_samples_per_second": 128.007, "eval_steps_per_second": 2.048, "step": 429000 }, { "epoch": 2.75, "learning_rate": 4.1280488845648354e-06, "loss": 1.9096, "step": 430000 }, { "epoch": 2.75, "eval_loss": 2.124612331390381, "eval_runtime": 15.4182, "eval_samples_per_second": 129.717, "eval_steps_per_second": 2.075, "step": 430000 }, { "epoch": 2.76, "learning_rate": 4.021369928482428e-06, "loss": 1.9021, "step": 431000 }, { "epoch": 2.76, "eval_loss": 2.1339197158813477, "eval_runtime": 15.3184, "eval_samples_per_second": 130.562, "eval_steps_per_second": 2.089, "step": 431000 }, { "epoch": 2.77, "learning_rate": 3.914690972400021e-06, "loss": 1.9051, "step": 432000 }, { "epoch": 2.77, "eval_loss": 2.150739908218384, "eval_runtime": 15.3685, "eval_samples_per_second": 130.137, "eval_steps_per_second": 2.082, "step": 432000 }, { "epoch": 2.77, "learning_rate": 3.808012016317614e-06, "loss": 1.8959, "step": 433000 }, { "epoch": 2.77, "eval_loss": 2.1340439319610596, "eval_runtime": 15.9351, "eval_samples_per_second": 125.509, "eval_steps_per_second": 2.008, "step": 433000 }, { "epoch": 2.78, "learning_rate": 3.7013330602352055e-06, "loss": 1.8924, "step": 434000 }, { "epoch": 2.78, "eval_loss": 2.1609554290771484, "eval_runtime": 15.2114, "eval_samples_per_second": 131.48, "eval_steps_per_second": 2.104, "step": 434000 }, { "epoch": 2.78, "learning_rate": 3.5946541041527984e-06, "loss": 1.9091, "step": 435000 }, { "epoch": 2.78, "eval_loss": 2.147794008255005, "eval_runtime": 15.5411, "eval_samples_per_second": 128.691, "eval_steps_per_second": 2.059, "step": 435000 }, { "epoch": 2.79, "learning_rate": 3.487975148070391e-06, "loss": 1.8908, "step": 436000 }, { "epoch": 2.79, "eval_loss": 2.100537061691284, "eval_runtime": 15.6967, "eval_samples_per_second": 127.415, "eval_steps_per_second": 2.039, "step": 436000 }, { "epoch": 2.8, "learning_rate": 3.3812961919879834e-06, "loss": 1.8946, "step": 437000 }, { "epoch": 2.8, "eval_loss": 2.111453056335449, "eval_runtime": 15.3824, "eval_samples_per_second": 130.019, "eval_steps_per_second": 2.08, "step": 437000 }, { "epoch": 2.8, "learning_rate": 3.2746172359055764e-06, "loss": 1.8977, "step": 438000 }, { "epoch": 2.8, "eval_loss": 2.130976676940918, "eval_runtime": 15.1954, "eval_samples_per_second": 131.618, "eval_steps_per_second": 2.106, "step": 438000 }, { "epoch": 2.81, "learning_rate": 3.167938279823169e-06, "loss": 1.9021, "step": 439000 }, { "epoch": 2.81, "eval_loss": 2.1252684593200684, "eval_runtime": 15.3946, "eval_samples_per_second": 129.916, "eval_steps_per_second": 2.079, "step": 439000 }, { "epoch": 2.82, "learning_rate": 3.061259323740762e-06, "loss": 1.9019, "step": 440000 }, { "epoch": 2.82, "eval_loss": 2.1282765865325928, "eval_runtime": 15.6319, "eval_samples_per_second": 127.943, "eval_steps_per_second": 2.047, "step": 440000 }, { "epoch": 2.82, "learning_rate": 2.9545803676583543e-06, "loss": 1.8947, "step": 441000 }, { "epoch": 2.82, "eval_loss": 2.1524507999420166, "eval_runtime": 15.3337, "eval_samples_per_second": 130.432, "eval_steps_per_second": 2.087, "step": 441000 }, { "epoch": 2.83, "learning_rate": 2.847901411575947e-06, "loss": 1.8854, "step": 442000 }, { "epoch": 2.83, "eval_loss": 2.1064517498016357, "eval_runtime": 15.2656, "eval_samples_per_second": 131.013, "eval_steps_per_second": 2.096, "step": 442000 }, { "epoch": 2.84, "learning_rate": 2.7412224554935398e-06, "loss": 1.9007, "step": 443000 }, { "epoch": 2.84, "eval_loss": 2.0694828033447266, "eval_runtime": 15.8869, "eval_samples_per_second": 125.89, "eval_steps_per_second": 2.014, "step": 443000 }, { "epoch": 2.84, "learning_rate": 2.6345434994111323e-06, "loss": 1.8981, "step": 444000 }, { "epoch": 2.84, "eval_loss": 2.1273715496063232, "eval_runtime": 15.1985, "eval_samples_per_second": 131.592, "eval_steps_per_second": 2.105, "step": 444000 }, { "epoch": 2.85, "learning_rate": 2.527864543328725e-06, "loss": 1.8872, "step": 445000 }, { "epoch": 2.85, "eval_loss": 2.1042518615722656, "eval_runtime": 15.3793, "eval_samples_per_second": 130.045, "eval_steps_per_second": 2.081, "step": 445000 }, { "epoch": 2.85, "learning_rate": 2.4211855872463177e-06, "loss": 1.8957, "step": 446000 }, { "epoch": 2.85, "eval_loss": 2.0750997066497803, "eval_runtime": 15.5989, "eval_samples_per_second": 128.214, "eval_steps_per_second": 2.051, "step": 446000 }, { "epoch": 2.86, "learning_rate": 2.3145066311639102e-06, "loss": 1.9031, "step": 447000 }, { "epoch": 2.86, "eval_loss": 2.127918004989624, "eval_runtime": 15.6223, "eval_samples_per_second": 128.022, "eval_steps_per_second": 2.048, "step": 447000 }, { "epoch": 2.87, "learning_rate": 2.2078276750815028e-06, "loss": 1.9001, "step": 448000 }, { "epoch": 2.87, "eval_loss": 2.1019787788391113, "eval_runtime": 15.4843, "eval_samples_per_second": 129.163, "eval_steps_per_second": 2.067, "step": 448000 }, { "epoch": 2.87, "learning_rate": 2.1011487189990953e-06, "loss": 1.8964, "step": 449000 }, { "epoch": 2.87, "eval_loss": 2.0935049057006836, "eval_runtime": 15.5998, "eval_samples_per_second": 128.206, "eval_steps_per_second": 2.051, "step": 449000 }, { "epoch": 2.88, "learning_rate": 1.994469762916688e-06, "loss": 1.9003, "step": 450000 }, { "epoch": 2.88, "eval_loss": 2.1466352939605713, "eval_runtime": 15.1432, "eval_samples_per_second": 132.072, "eval_steps_per_second": 2.113, "step": 450000 }, { "epoch": 2.89, "learning_rate": 1.8877908068342807e-06, "loss": 1.9041, "step": 451000 }, { "epoch": 2.89, "eval_loss": 2.1213934421539307, "eval_runtime": 15.5486, "eval_samples_per_second": 128.629, "eval_steps_per_second": 2.058, "step": 451000 }, { "epoch": 2.89, "learning_rate": 1.7811118507518734e-06, "loss": 1.8972, "step": 452000 }, { "epoch": 2.89, "eval_loss": 2.139911651611328, "eval_runtime": 17.4254, "eval_samples_per_second": 114.775, "eval_steps_per_second": 1.836, "step": 452000 }, { "epoch": 2.9, "learning_rate": 1.674432894669466e-06, "loss": 1.9001, "step": 453000 }, { "epoch": 2.9, "eval_loss": 2.1135449409484863, "eval_runtime": 15.3928, "eval_samples_per_second": 129.931, "eval_steps_per_second": 2.079, "step": 453000 }, { "epoch": 2.91, "learning_rate": 1.5677539385870587e-06, "loss": 1.9034, "step": 454000 }, { "epoch": 2.91, "eval_loss": 2.0974974632263184, "eval_runtime": 15.5392, "eval_samples_per_second": 128.707, "eval_steps_per_second": 2.059, "step": 454000 }, { "epoch": 2.91, "learning_rate": 1.4610749825046512e-06, "loss": 1.88, "step": 455000 }, { "epoch": 2.91, "eval_loss": 2.086946725845337, "eval_runtime": 15.3909, "eval_samples_per_second": 129.947, "eval_steps_per_second": 2.079, "step": 455000 }, { "epoch": 2.92, "learning_rate": 1.354396026422244e-06, "loss": 1.894, "step": 456000 }, { "epoch": 2.92, "eval_loss": 2.0814855098724365, "eval_runtime": 16.0281, "eval_samples_per_second": 124.781, "eval_steps_per_second": 1.996, "step": 456000 }, { "epoch": 2.93, "learning_rate": 1.2477170703398366e-06, "loss": 1.8956, "step": 457000 }, { "epoch": 2.93, "eval_loss": 2.1207478046417236, "eval_runtime": 16.265, "eval_samples_per_second": 122.964, "eval_steps_per_second": 1.967, "step": 457000 }, { "epoch": 2.93, "learning_rate": 1.1410381142574291e-06, "loss": 1.8882, "step": 458000 }, { "epoch": 2.93, "eval_loss": 2.1136324405670166, "eval_runtime": 15.2771, "eval_samples_per_second": 130.915, "eval_steps_per_second": 2.095, "step": 458000 }, { "epoch": 2.94, "learning_rate": 1.0343591581750219e-06, "loss": 1.8924, "step": 459000 }, { "epoch": 2.94, "eval_loss": 2.137352466583252, "eval_runtime": 15.981, "eval_samples_per_second": 125.149, "eval_steps_per_second": 2.002, "step": 459000 }, { "epoch": 2.94, "learning_rate": 9.276802020926144e-07, "loss": 1.8953, "step": 460000 }, { "epoch": 2.94, "eval_loss": 2.1012661457061768, "eval_runtime": 15.3369, "eval_samples_per_second": 130.404, "eval_steps_per_second": 2.086, "step": 460000 }, { "epoch": 2.95, "learning_rate": 8.210012460102071e-07, "loss": 1.893, "step": 461000 }, { "epoch": 2.95, "eval_loss": 2.135178804397583, "eval_runtime": 15.8046, "eval_samples_per_second": 126.546, "eval_steps_per_second": 2.025, "step": 461000 }, { "epoch": 2.96, "learning_rate": 7.143222899277997e-07, "loss": 1.8903, "step": 462000 }, { "epoch": 2.96, "eval_loss": 2.1333072185516357, "eval_runtime": 15.5282, "eval_samples_per_second": 128.798, "eval_steps_per_second": 2.061, "step": 462000 }, { "epoch": 2.96, "learning_rate": 6.076433338453923e-07, "loss": 1.8895, "step": 463000 }, { "epoch": 2.96, "eval_loss": 2.1294093132019043, "eval_runtime": 15.3716, "eval_samples_per_second": 130.11, "eval_steps_per_second": 2.082, "step": 463000 }, { "epoch": 2.97, "learning_rate": 5.009643777629849e-07, "loss": 1.8939, "step": 464000 }, { "epoch": 2.97, "eval_loss": 2.1235413551330566, "eval_runtime": 15.3293, "eval_samples_per_second": 130.469, "eval_steps_per_second": 2.088, "step": 464000 }, { "epoch": 2.98, "learning_rate": 3.9428542168057766e-07, "loss": 1.8915, "step": 465000 }, { "epoch": 2.98, "eval_loss": 2.0933895111083984, "eval_runtime": 15.9617, "eval_samples_per_second": 125.3, "eval_steps_per_second": 2.005, "step": 465000 }, { "epoch": 2.98, "learning_rate": 2.8760646559817023e-07, "loss": 1.8884, "step": 466000 }, { "epoch": 2.98, "eval_loss": 2.1353940963745117, "eval_runtime": 15.6819, "eval_samples_per_second": 127.536, "eval_steps_per_second": 2.041, "step": 466000 }, { "epoch": 2.99, "learning_rate": 1.809275095157629e-07, "loss": 1.8932, "step": 467000 }, { "epoch": 2.99, "eval_loss": 2.1101338863372803, "eval_runtime": 15.545, "eval_samples_per_second": 128.659, "eval_steps_per_second": 2.059, "step": 467000 }, { "epoch": 3.0, "learning_rate": 7.424855343335553e-08, "loss": 1.9, "step": 468000 }, { "epoch": 3.0, "eval_loss": 2.130716562271118, "eval_runtime": 15.4114, "eval_samples_per_second": 129.774, "eval_steps_per_second": 2.076, "step": 468000 }, { "epoch": 3.0, "step": 468696, "total_flos": 6.219491681834838e+18, "train_loss": 0.6429893864398178, "train_runtime": 172266.1403, "train_samples_per_second": 174.128, "train_steps_per_second": 2.721 } ], "max_steps": 468696, "num_train_epochs": 3, "total_flos": 6.219491681834838e+18, "trial_name": null, "trial_params": null }