{ "best_metric": 2.4363410472869873, "best_model_checkpoint": "/home/seemdog/manchu_BERT/1002_BERT_DA_1.0/checkpoint-86000", "epoch": 9.964620917517031, "global_step": 213000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 4.9766081871345035e-05, "loss": 6.1581, "step": 1000 }, { "epoch": 0.05, "eval_loss": 5.5598931312561035, "eval_runtime": 54.891, "eval_samples_per_second": 120.767, "eval_steps_per_second": 1.895, "step": 1000 }, { "epoch": 0.09, "learning_rate": 4.953216374269006e-05, "loss": 5.3713, "step": 2000 }, { "epoch": 0.09, "eval_loss": 4.843267440795898, "eval_runtime": 54.8945, "eval_samples_per_second": 120.759, "eval_steps_per_second": 1.895, "step": 2000 }, { "epoch": 0.14, "learning_rate": 4.9298245614035086e-05, "loss": 4.7624, "step": 3000 }, { "epoch": 0.14, "eval_loss": 4.427705764770508, "eval_runtime": 54.9095, "eval_samples_per_second": 120.726, "eval_steps_per_second": 1.894, "step": 3000 }, { "epoch": 0.19, "learning_rate": 4.906432748538012e-05, "loss": 4.2884, "step": 4000 }, { "epoch": 0.19, "eval_loss": 4.152446746826172, "eval_runtime": 54.9536, "eval_samples_per_second": 120.629, "eval_steps_per_second": 1.893, "step": 4000 }, { "epoch": 0.23, "learning_rate": 4.883040935672515e-05, "loss": 3.908, "step": 5000 }, { "epoch": 0.23, "eval_loss": 3.943004608154297, "eval_runtime": 54.9769, "eval_samples_per_second": 120.578, "eval_steps_per_second": 1.892, "step": 5000 }, { "epoch": 0.28, "learning_rate": 4.859649122807018e-05, "loss": 3.6357, "step": 6000 }, { "epoch": 0.28, "eval_loss": 3.7840378284454346, "eval_runtime": 54.9612, "eval_samples_per_second": 120.612, "eval_steps_per_second": 1.892, "step": 6000 }, { "epoch": 0.33, "learning_rate": 4.836257309941521e-05, "loss": 3.442, "step": 7000 }, { "epoch": 0.33, "eval_loss": 3.6515119075775146, "eval_runtime": 55.0182, "eval_samples_per_second": 120.487, "eval_steps_per_second": 1.89, "step": 7000 }, { "epoch": 0.37, "learning_rate": 4.8128654970760235e-05, "loss": 3.2982, "step": 8000 }, { "epoch": 0.37, "eval_loss": 3.5147831439971924, "eval_runtime": 54.9459, "eval_samples_per_second": 120.646, "eval_steps_per_second": 1.893, "step": 8000 }, { "epoch": 0.42, "learning_rate": 4.789473684210526e-05, "loss": 3.1681, "step": 9000 }, { "epoch": 0.42, "eval_loss": 3.4453866481781006, "eval_runtime": 54.9741, "eval_samples_per_second": 120.584, "eval_steps_per_second": 1.892, "step": 9000 }, { "epoch": 0.47, "learning_rate": 4.7660818713450294e-05, "loss": 3.0515, "step": 10000 }, { "epoch": 0.47, "eval_loss": 3.3482985496520996, "eval_runtime": 54.9922, "eval_samples_per_second": 120.544, "eval_steps_per_second": 1.891, "step": 10000 }, { "epoch": 0.51, "learning_rate": 4.7426900584795326e-05, "loss": 2.9408, "step": 11000 }, { "epoch": 0.51, "eval_loss": 3.274308919906616, "eval_runtime": 55.0307, "eval_samples_per_second": 120.46, "eval_steps_per_second": 1.89, "step": 11000 }, { "epoch": 0.56, "learning_rate": 4.719298245614036e-05, "loss": 2.8601, "step": 12000 }, { "epoch": 0.56, "eval_loss": 3.2094714641571045, "eval_runtime": 54.9444, "eval_samples_per_second": 120.649, "eval_steps_per_second": 1.893, "step": 12000 }, { "epoch": 0.61, "learning_rate": 4.695906432748538e-05, "loss": 2.7866, "step": 13000 }, { "epoch": 0.61, "eval_loss": 3.1299281120300293, "eval_runtime": 54.9484, "eval_samples_per_second": 120.64, "eval_steps_per_second": 1.893, "step": 13000 }, { "epoch": 0.65, "learning_rate": 4.672514619883041e-05, "loss": 2.7094, "step": 14000 }, { "epoch": 0.65, "eval_loss": 3.096022844314575, "eval_runtime": 55.155, "eval_samples_per_second": 120.189, "eval_steps_per_second": 1.886, "step": 14000 }, { "epoch": 0.7, "learning_rate": 4.649122807017544e-05, "loss": 2.6424, "step": 15000 }, { "epoch": 0.7, "eval_loss": 3.060807228088379, "eval_runtime": 55.1935, "eval_samples_per_second": 120.105, "eval_steps_per_second": 1.884, "step": 15000 }, { "epoch": 0.75, "learning_rate": 4.625730994152047e-05, "loss": 2.5729, "step": 16000 }, { "epoch": 0.75, "eval_loss": 3.0170695781707764, "eval_runtime": 55.2064, "eval_samples_per_second": 120.077, "eval_steps_per_second": 1.884, "step": 16000 }, { "epoch": 0.8, "learning_rate": 4.60233918128655e-05, "loss": 2.5108, "step": 17000 }, { "epoch": 0.8, "eval_loss": 2.9729015827178955, "eval_runtime": 55.2048, "eval_samples_per_second": 120.08, "eval_steps_per_second": 1.884, "step": 17000 }, { "epoch": 0.84, "learning_rate": 4.5789473684210527e-05, "loss": 2.4538, "step": 18000 }, { "epoch": 0.84, "eval_loss": 2.9392964839935303, "eval_runtime": 55.2009, "eval_samples_per_second": 120.089, "eval_steps_per_second": 1.884, "step": 18000 }, { "epoch": 0.89, "learning_rate": 4.555555555555556e-05, "loss": 2.3941, "step": 19000 }, { "epoch": 0.89, "eval_loss": 2.900946617126465, "eval_runtime": 55.1868, "eval_samples_per_second": 120.119, "eval_steps_per_second": 1.885, "step": 19000 }, { "epoch": 0.94, "learning_rate": 4.5321637426900585e-05, "loss": 2.3341, "step": 20000 }, { "epoch": 0.94, "eval_loss": 2.87040376663208, "eval_runtime": 55.0611, "eval_samples_per_second": 120.393, "eval_steps_per_second": 1.889, "step": 20000 }, { "epoch": 0.98, "learning_rate": 4.508771929824562e-05, "loss": 2.2797, "step": 21000 }, { "epoch": 0.98, "eval_loss": 2.8554604053497314, "eval_runtime": 54.9944, "eval_samples_per_second": 120.54, "eval_steps_per_second": 1.891, "step": 21000 }, { "epoch": 1.03, "learning_rate": 4.485380116959065e-05, "loss": 2.2284, "step": 22000 }, { "epoch": 1.03, "eval_loss": 2.8280177116394043, "eval_runtime": 54.9695, "eval_samples_per_second": 120.594, "eval_steps_per_second": 1.892, "step": 22000 }, { "epoch": 1.08, "learning_rate": 4.4619883040935676e-05, "loss": 2.1651, "step": 23000 }, { "epoch": 1.08, "eval_loss": 2.7877776622772217, "eval_runtime": 54.9786, "eval_samples_per_second": 120.574, "eval_steps_per_second": 1.892, "step": 23000 }, { "epoch": 1.12, "learning_rate": 4.43859649122807e-05, "loss": 2.1267, "step": 24000 }, { "epoch": 1.12, "eval_loss": 2.7796318531036377, "eval_runtime": 55.0112, "eval_samples_per_second": 120.503, "eval_steps_per_second": 1.891, "step": 24000 }, { "epoch": 1.17, "learning_rate": 4.4152046783625734e-05, "loss": 2.0887, "step": 25000 }, { "epoch": 1.17, "eval_loss": 2.7155935764312744, "eval_runtime": 54.9846, "eval_samples_per_second": 120.561, "eval_steps_per_second": 1.891, "step": 25000 }, { "epoch": 1.22, "learning_rate": 4.3918128654970766e-05, "loss": 2.0477, "step": 26000 }, { "epoch": 1.22, "eval_loss": 2.7347090244293213, "eval_runtime": 54.9797, "eval_samples_per_second": 120.572, "eval_steps_per_second": 1.892, "step": 26000 }, { "epoch": 1.26, "learning_rate": 4.368421052631579e-05, "loss": 2.0055, "step": 27000 }, { "epoch": 1.26, "eval_loss": 2.7260184288024902, "eval_runtime": 54.9686, "eval_samples_per_second": 120.596, "eval_steps_per_second": 1.892, "step": 27000 }, { "epoch": 1.31, "learning_rate": 4.345029239766082e-05, "loss": 1.9738, "step": 28000 }, { "epoch": 1.31, "eval_loss": 2.7053301334381104, "eval_runtime": 54.975, "eval_samples_per_second": 120.582, "eval_steps_per_second": 1.892, "step": 28000 }, { "epoch": 1.36, "learning_rate": 4.321637426900585e-05, "loss": 1.9336, "step": 29000 }, { "epoch": 1.36, "eval_loss": 2.6540746688842773, "eval_runtime": 54.9866, "eval_samples_per_second": 120.557, "eval_steps_per_second": 1.891, "step": 29000 }, { "epoch": 1.4, "learning_rate": 4.298245614035088e-05, "loss": 1.9008, "step": 30000 }, { "epoch": 1.4, "eval_loss": 2.6721866130828857, "eval_runtime": 54.9707, "eval_samples_per_second": 120.592, "eval_steps_per_second": 1.892, "step": 30000 }, { "epoch": 1.45, "learning_rate": 4.274853801169591e-05, "loss": 1.8603, "step": 31000 }, { "epoch": 1.45, "eval_loss": 2.6387619972229004, "eval_runtime": 54.9719, "eval_samples_per_second": 120.589, "eval_steps_per_second": 1.892, "step": 31000 }, { "epoch": 1.5, "learning_rate": 4.251461988304094e-05, "loss": 1.8291, "step": 32000 }, { "epoch": 1.5, "eval_loss": 2.640782594680786, "eval_runtime": 54.9643, "eval_samples_per_second": 120.606, "eval_steps_per_second": 1.892, "step": 32000 }, { "epoch": 1.54, "learning_rate": 4.228070175438597e-05, "loss": 1.8059, "step": 33000 }, { "epoch": 1.54, "eval_loss": 2.614128589630127, "eval_runtime": 54.9538, "eval_samples_per_second": 120.629, "eval_steps_per_second": 1.893, "step": 33000 }, { "epoch": 1.59, "learning_rate": 4.204678362573099e-05, "loss": 1.7663, "step": 34000 }, { "epoch": 1.59, "eval_loss": 2.618607997894287, "eval_runtime": 55.0051, "eval_samples_per_second": 120.516, "eval_steps_per_second": 1.891, "step": 34000 }, { "epoch": 1.64, "learning_rate": 4.1812865497076025e-05, "loss": 1.7322, "step": 35000 }, { "epoch": 1.64, "eval_loss": 2.6462574005126953, "eval_runtime": 54.9802, "eval_samples_per_second": 120.571, "eval_steps_per_second": 1.892, "step": 35000 }, { "epoch": 1.68, "learning_rate": 4.157894736842106e-05, "loss": 1.7187, "step": 36000 }, { "epoch": 1.68, "eval_loss": 2.5989272594451904, "eval_runtime": 54.9619, "eval_samples_per_second": 120.611, "eval_steps_per_second": 1.892, "step": 36000 }, { "epoch": 1.73, "learning_rate": 4.134502923976608e-05, "loss": 1.6852, "step": 37000 }, { "epoch": 1.73, "eval_loss": 2.5719058513641357, "eval_runtime": 54.9667, "eval_samples_per_second": 120.6, "eval_steps_per_second": 1.892, "step": 37000 }, { "epoch": 1.78, "learning_rate": 4.111111111111111e-05, "loss": 1.6649, "step": 38000 }, { "epoch": 1.78, "eval_loss": 2.57804012298584, "eval_runtime": 54.9675, "eval_samples_per_second": 120.598, "eval_steps_per_second": 1.892, "step": 38000 }, { "epoch": 1.82, "learning_rate": 4.087719298245614e-05, "loss": 1.6285, "step": 39000 }, { "epoch": 1.82, "eval_loss": 2.5606088638305664, "eval_runtime": 55.1929, "eval_samples_per_second": 120.106, "eval_steps_per_second": 1.884, "step": 39000 }, { "epoch": 1.87, "learning_rate": 4.0643274853801174e-05, "loss": 1.6033, "step": 40000 }, { "epoch": 1.87, "eval_loss": 2.570094585418701, "eval_runtime": 55.1572, "eval_samples_per_second": 120.184, "eval_steps_per_second": 1.886, "step": 40000 }, { "epoch": 1.92, "learning_rate": 4.04093567251462e-05, "loss": 1.5833, "step": 41000 }, { "epoch": 1.92, "eval_loss": 2.5516393184661865, "eval_runtime": 55.1223, "eval_samples_per_second": 120.26, "eval_steps_per_second": 1.887, "step": 41000 }, { "epoch": 1.96, "learning_rate": 4.017543859649123e-05, "loss": 1.5701, "step": 42000 }, { "epoch": 1.96, "eval_loss": 2.544060707092285, "eval_runtime": 54.9919, "eval_samples_per_second": 120.545, "eval_steps_per_second": 1.891, "step": 42000 }, { "epoch": 2.01, "learning_rate": 3.994152046783626e-05, "loss": 1.5252, "step": 43000 }, { "epoch": 2.01, "eval_loss": 2.545295476913452, "eval_runtime": 54.9924, "eval_samples_per_second": 120.544, "eval_steps_per_second": 1.891, "step": 43000 }, { "epoch": 2.06, "learning_rate": 3.970760233918129e-05, "loss": 1.5019, "step": 44000 }, { "epoch": 2.06, "eval_loss": 2.547807216644287, "eval_runtime": 55.007, "eval_samples_per_second": 120.512, "eval_steps_per_second": 1.891, "step": 44000 }, { "epoch": 2.11, "learning_rate": 3.9473684210526316e-05, "loss": 1.4789, "step": 45000 }, { "epoch": 2.11, "eval_loss": 2.541635036468506, "eval_runtime": 54.9822, "eval_samples_per_second": 120.566, "eval_steps_per_second": 1.892, "step": 45000 }, { "epoch": 2.15, "learning_rate": 3.923976608187135e-05, "loss": 1.4611, "step": 46000 }, { "epoch": 2.15, "eval_loss": 2.526390790939331, "eval_runtime": 54.9826, "eval_samples_per_second": 120.565, "eval_steps_per_second": 1.892, "step": 46000 }, { "epoch": 2.2, "learning_rate": 3.9005847953216374e-05, "loss": 1.4413, "step": 47000 }, { "epoch": 2.2, "eval_loss": 2.5193886756896973, "eval_runtime": 54.9793, "eval_samples_per_second": 120.573, "eval_steps_per_second": 1.892, "step": 47000 }, { "epoch": 2.25, "learning_rate": 3.877192982456141e-05, "loss": 1.4106, "step": 48000 }, { "epoch": 2.25, "eval_loss": 2.504810094833374, "eval_runtime": 55.0248, "eval_samples_per_second": 120.473, "eval_steps_per_second": 1.89, "step": 48000 }, { "epoch": 2.29, "learning_rate": 3.853801169590643e-05, "loss": 1.3928, "step": 49000 }, { "epoch": 2.29, "eval_loss": 2.5266056060791016, "eval_runtime": 55.1287, "eval_samples_per_second": 120.246, "eval_steps_per_second": 1.886, "step": 49000 }, { "epoch": 2.34, "learning_rate": 3.8304093567251465e-05, "loss": 1.3857, "step": 50000 }, { "epoch": 2.34, "eval_loss": 2.5026743412017822, "eval_runtime": 55.0968, "eval_samples_per_second": 120.315, "eval_steps_per_second": 1.888, "step": 50000 }, { "epoch": 2.39, "learning_rate": 3.80701754385965e-05, "loss": 1.3682, "step": 51000 }, { "epoch": 2.39, "eval_loss": 2.5191988945007324, "eval_runtime": 55.0835, "eval_samples_per_second": 120.345, "eval_steps_per_second": 1.888, "step": 51000 }, { "epoch": 2.43, "learning_rate": 3.783625730994152e-05, "loss": 1.337, "step": 52000 }, { "epoch": 2.43, "eval_loss": 2.4917993545532227, "eval_runtime": 55.1615, "eval_samples_per_second": 120.175, "eval_steps_per_second": 1.885, "step": 52000 }, { "epoch": 2.48, "learning_rate": 3.760233918128655e-05, "loss": 1.3314, "step": 53000 }, { "epoch": 2.48, "eval_loss": 2.503882646560669, "eval_runtime": 55.1711, "eval_samples_per_second": 120.153, "eval_steps_per_second": 1.885, "step": 53000 }, { "epoch": 2.53, "learning_rate": 3.736842105263158e-05, "loss": 1.3213, "step": 54000 }, { "epoch": 2.53, "eval_loss": 2.5335164070129395, "eval_runtime": 55.1504, "eval_samples_per_second": 120.199, "eval_steps_per_second": 1.886, "step": 54000 }, { "epoch": 2.57, "learning_rate": 3.713450292397661e-05, "loss": 1.2901, "step": 55000 }, { "epoch": 2.57, "eval_loss": 2.5040109157562256, "eval_runtime": 55.1836, "eval_samples_per_second": 120.126, "eval_steps_per_second": 1.885, "step": 55000 }, { "epoch": 2.62, "learning_rate": 3.690058479532164e-05, "loss": 1.2927, "step": 56000 }, { "epoch": 2.62, "eval_loss": 2.4990580081939697, "eval_runtime": 55.1982, "eval_samples_per_second": 120.095, "eval_steps_per_second": 1.884, "step": 56000 }, { "epoch": 2.67, "learning_rate": 3.6666666666666666e-05, "loss": 1.2631, "step": 57000 }, { "epoch": 2.67, "eval_loss": 2.500002861022949, "eval_runtime": 55.1671, "eval_samples_per_second": 120.162, "eval_steps_per_second": 1.885, "step": 57000 }, { "epoch": 2.71, "learning_rate": 3.64327485380117e-05, "loss": 1.2526, "step": 58000 }, { "epoch": 2.71, "eval_loss": 2.484260320663452, "eval_runtime": 55.0693, "eval_samples_per_second": 120.376, "eval_steps_per_second": 1.889, "step": 58000 }, { "epoch": 2.76, "learning_rate": 3.6198830409356724e-05, "loss": 1.2371, "step": 59000 }, { "epoch": 2.76, "eval_loss": 2.480639696121216, "eval_runtime": 55.0676, "eval_samples_per_second": 120.379, "eval_steps_per_second": 1.889, "step": 59000 }, { "epoch": 2.81, "learning_rate": 3.5964912280701756e-05, "loss": 1.2194, "step": 60000 }, { "epoch": 2.81, "eval_loss": 2.480283498764038, "eval_runtime": 54.981, "eval_samples_per_second": 120.569, "eval_steps_per_second": 1.892, "step": 60000 }, { "epoch": 2.85, "learning_rate": 3.573099415204679e-05, "loss": 1.2103, "step": 61000 }, { "epoch": 2.85, "eval_loss": 2.4655823707580566, "eval_runtime": 54.9896, "eval_samples_per_second": 120.55, "eval_steps_per_second": 1.891, "step": 61000 }, { "epoch": 2.9, "learning_rate": 3.5497076023391815e-05, "loss": 1.1954, "step": 62000 }, { "epoch": 2.9, "eval_loss": 2.467862367630005, "eval_runtime": 55.0349, "eval_samples_per_second": 120.451, "eval_steps_per_second": 1.89, "step": 62000 }, { "epoch": 2.95, "learning_rate": 3.526315789473684e-05, "loss": 1.1841, "step": 63000 }, { "epoch": 2.95, "eval_loss": 2.4734864234924316, "eval_runtime": 55.0767, "eval_samples_per_second": 120.359, "eval_steps_per_second": 1.888, "step": 63000 }, { "epoch": 2.99, "learning_rate": 3.502923976608187e-05, "loss": 1.1697, "step": 64000 }, { "epoch": 2.99, "eval_loss": 2.4691245555877686, "eval_runtime": 55.01, "eval_samples_per_second": 120.505, "eval_steps_per_second": 1.891, "step": 64000 }, { "epoch": 3.04, "learning_rate": 3.4795321637426905e-05, "loss": 1.1488, "step": 65000 }, { "epoch": 3.04, "eval_loss": 2.50709867477417, "eval_runtime": 55.0061, "eval_samples_per_second": 120.514, "eval_steps_per_second": 1.891, "step": 65000 }, { "epoch": 3.09, "learning_rate": 3.456140350877193e-05, "loss": 1.1343, "step": 66000 }, { "epoch": 3.09, "eval_loss": 2.464665412902832, "eval_runtime": 54.9972, "eval_samples_per_second": 120.533, "eval_steps_per_second": 1.891, "step": 66000 }, { "epoch": 3.13, "learning_rate": 3.432748538011696e-05, "loss": 1.1285, "step": 67000 }, { "epoch": 3.13, "eval_loss": 2.4716575145721436, "eval_runtime": 54.9735, "eval_samples_per_second": 120.585, "eval_steps_per_second": 1.892, "step": 67000 }, { "epoch": 3.18, "learning_rate": 3.409356725146199e-05, "loss": 1.1124, "step": 68000 }, { "epoch": 3.18, "eval_loss": 2.476966619491577, "eval_runtime": 55.0007, "eval_samples_per_second": 120.526, "eval_steps_per_second": 1.891, "step": 68000 }, { "epoch": 3.23, "learning_rate": 3.385964912280702e-05, "loss": 1.1097, "step": 69000 }, { "epoch": 3.23, "eval_loss": 2.487794876098633, "eval_runtime": 54.9919, "eval_samples_per_second": 120.545, "eval_steps_per_second": 1.891, "step": 69000 }, { "epoch": 3.27, "learning_rate": 3.362573099415205e-05, "loss": 1.0956, "step": 70000 }, { "epoch": 3.27, "eval_loss": 2.4818880558013916, "eval_runtime": 55.0269, "eval_samples_per_second": 120.468, "eval_steps_per_second": 1.89, "step": 70000 }, { "epoch": 3.32, "learning_rate": 3.339181286549708e-05, "loss": 1.088, "step": 71000 }, { "epoch": 3.32, "eval_loss": 2.4609289169311523, "eval_runtime": 54.9477, "eval_samples_per_second": 120.642, "eval_steps_per_second": 1.893, "step": 71000 }, { "epoch": 3.37, "learning_rate": 3.3157894736842106e-05, "loss": 1.0728, "step": 72000 }, { "epoch": 3.37, "eval_loss": 2.4839322566986084, "eval_runtime": 54.9672, "eval_samples_per_second": 120.599, "eval_steps_per_second": 1.892, "step": 72000 }, { "epoch": 3.42, "learning_rate": 3.292397660818713e-05, "loss": 1.0587, "step": 73000 }, { "epoch": 3.42, "eval_loss": 2.4727675914764404, "eval_runtime": 55.0507, "eval_samples_per_second": 120.416, "eval_steps_per_second": 1.889, "step": 73000 }, { "epoch": 3.46, "learning_rate": 3.2690058479532164e-05, "loss": 1.0534, "step": 74000 }, { "epoch": 3.46, "eval_loss": 2.4812207221984863, "eval_runtime": 54.9899, "eval_samples_per_second": 120.549, "eval_steps_per_second": 1.891, "step": 74000 }, { "epoch": 3.51, "learning_rate": 3.24561403508772e-05, "loss": 1.0455, "step": 75000 }, { "epoch": 3.51, "eval_loss": 2.469550609588623, "eval_runtime": 54.9765, "eval_samples_per_second": 120.579, "eval_steps_per_second": 1.892, "step": 75000 }, { "epoch": 3.56, "learning_rate": 3.222222222222223e-05, "loss": 1.0402, "step": 76000 }, { "epoch": 3.56, "eval_loss": 2.458113431930542, "eval_runtime": 54.9925, "eval_samples_per_second": 120.544, "eval_steps_per_second": 1.891, "step": 76000 }, { "epoch": 3.6, "learning_rate": 3.198830409356725e-05, "loss": 1.0227, "step": 77000 }, { "epoch": 3.6, "eval_loss": 2.4712133407592773, "eval_runtime": 54.9707, "eval_samples_per_second": 120.592, "eval_steps_per_second": 1.892, "step": 77000 }, { "epoch": 3.65, "learning_rate": 3.175438596491228e-05, "loss": 1.0172, "step": 78000 }, { "epoch": 3.65, "eval_loss": 2.4822046756744385, "eval_runtime": 54.9842, "eval_samples_per_second": 120.562, "eval_steps_per_second": 1.891, "step": 78000 }, { "epoch": 3.7, "learning_rate": 3.152046783625731e-05, "loss": 0.9947, "step": 79000 }, { "epoch": 3.7, "eval_loss": 2.455008029937744, "eval_runtime": 54.9636, "eval_samples_per_second": 120.607, "eval_steps_per_second": 1.892, "step": 79000 }, { "epoch": 3.74, "learning_rate": 3.128654970760234e-05, "loss": 0.9924, "step": 80000 }, { "epoch": 3.74, "eval_loss": 2.440960168838501, "eval_runtime": 54.9708, "eval_samples_per_second": 120.591, "eval_steps_per_second": 1.892, "step": 80000 }, { "epoch": 3.79, "learning_rate": 3.105263157894737e-05, "loss": 0.9863, "step": 81000 }, { "epoch": 3.79, "eval_loss": 2.454493761062622, "eval_runtime": 54.966, "eval_samples_per_second": 120.602, "eval_steps_per_second": 1.892, "step": 81000 }, { "epoch": 3.84, "learning_rate": 3.08187134502924e-05, "loss": 0.9793, "step": 82000 }, { "epoch": 3.84, "eval_loss": 2.482584238052368, "eval_runtime": 55.0651, "eval_samples_per_second": 120.385, "eval_steps_per_second": 1.889, "step": 82000 }, { "epoch": 3.88, "learning_rate": 3.058479532163743e-05, "loss": 0.9639, "step": 83000 }, { "epoch": 3.88, "eval_loss": 2.4847776889801025, "eval_runtime": 55.089, "eval_samples_per_second": 120.332, "eval_steps_per_second": 1.888, "step": 83000 }, { "epoch": 3.93, "learning_rate": 3.035087719298246e-05, "loss": 0.9584, "step": 84000 }, { "epoch": 3.93, "eval_loss": 2.4647934436798096, "eval_runtime": 55.1206, "eval_samples_per_second": 120.263, "eval_steps_per_second": 1.887, "step": 84000 }, { "epoch": 3.98, "learning_rate": 3.0116959064327488e-05, "loss": 0.9508, "step": 85000 }, { "epoch": 3.98, "eval_loss": 2.445103406906128, "eval_runtime": 55.0978, "eval_samples_per_second": 120.313, "eval_steps_per_second": 1.888, "step": 85000 }, { "epoch": 4.02, "learning_rate": 2.9883040935672517e-05, "loss": 0.9425, "step": 86000 }, { "epoch": 4.02, "eval_loss": 2.4363410472869873, "eval_runtime": 55.0773, "eval_samples_per_second": 120.358, "eval_steps_per_second": 1.888, "step": 86000 }, { "epoch": 4.07, "learning_rate": 2.9649122807017543e-05, "loss": 0.9301, "step": 87000 }, { "epoch": 4.07, "eval_loss": 2.4576821327209473, "eval_runtime": 55.0908, "eval_samples_per_second": 120.329, "eval_steps_per_second": 1.888, "step": 87000 }, { "epoch": 4.12, "learning_rate": 2.9415204678362572e-05, "loss": 0.922, "step": 88000 }, { "epoch": 4.12, "eval_loss": 2.487666130065918, "eval_runtime": 55.1028, "eval_samples_per_second": 120.302, "eval_steps_per_second": 1.887, "step": 88000 }, { "epoch": 4.16, "learning_rate": 2.9181286549707604e-05, "loss": 0.9102, "step": 89000 }, { "epoch": 4.16, "eval_loss": 2.462902784347534, "eval_runtime": 55.0955, "eval_samples_per_second": 120.318, "eval_steps_per_second": 1.888, "step": 89000 }, { "epoch": 4.21, "learning_rate": 2.8947368421052634e-05, "loss": 0.9081, "step": 90000 }, { "epoch": 4.21, "eval_loss": 2.4494595527648926, "eval_runtime": 55.0849, "eval_samples_per_second": 120.341, "eval_steps_per_second": 1.888, "step": 90000 }, { "epoch": 4.26, "learning_rate": 2.8713450292397666e-05, "loss": 0.8956, "step": 91000 }, { "epoch": 4.26, "eval_loss": 2.466681718826294, "eval_runtime": 55.0767, "eval_samples_per_second": 120.359, "eval_steps_per_second": 1.888, "step": 91000 }, { "epoch": 4.3, "learning_rate": 2.847953216374269e-05, "loss": 0.8932, "step": 92000 }, { "epoch": 4.3, "eval_loss": 2.4637372493743896, "eval_runtime": 55.0713, "eval_samples_per_second": 120.371, "eval_steps_per_second": 1.888, "step": 92000 }, { "epoch": 4.35, "learning_rate": 2.824561403508772e-05, "loss": 0.8845, "step": 93000 }, { "epoch": 4.35, "eval_loss": 2.4586174488067627, "eval_runtime": 55.0741, "eval_samples_per_second": 120.365, "eval_steps_per_second": 1.888, "step": 93000 }, { "epoch": 4.4, "learning_rate": 2.801169590643275e-05, "loss": 0.877, "step": 94000 }, { "epoch": 4.4, "eval_loss": 2.471717357635498, "eval_runtime": 55.0727, "eval_samples_per_second": 120.368, "eval_steps_per_second": 1.888, "step": 94000 }, { "epoch": 4.44, "learning_rate": 2.777777777777778e-05, "loss": 0.8713, "step": 95000 }, { "epoch": 4.44, "eval_loss": 2.4618284702301025, "eval_runtime": 55.0799, "eval_samples_per_second": 120.352, "eval_steps_per_second": 1.888, "step": 95000 }, { "epoch": 4.49, "learning_rate": 2.754385964912281e-05, "loss": 0.8768, "step": 96000 }, { "epoch": 4.49, "eval_loss": 2.4480040073394775, "eval_runtime": 55.1696, "eval_samples_per_second": 120.157, "eval_steps_per_second": 1.885, "step": 96000 }, { "epoch": 4.54, "learning_rate": 2.7309941520467834e-05, "loss": 0.8662, "step": 97000 }, { "epoch": 4.54, "eval_loss": 2.468902349472046, "eval_runtime": 55.1714, "eval_samples_per_second": 120.153, "eval_steps_per_second": 1.885, "step": 97000 }, { "epoch": 4.58, "learning_rate": 2.7076023391812866e-05, "loss": 0.8622, "step": 98000 }, { "epoch": 4.58, "eval_loss": 2.4613983631134033, "eval_runtime": 55.1613, "eval_samples_per_second": 120.175, "eval_steps_per_second": 1.885, "step": 98000 }, { "epoch": 4.63, "learning_rate": 2.6842105263157896e-05, "loss": 0.8497, "step": 99000 }, { "epoch": 4.63, "eval_loss": 2.488284111022949, "eval_runtime": 55.1664, "eval_samples_per_second": 120.164, "eval_steps_per_second": 1.885, "step": 99000 }, { "epoch": 4.68, "learning_rate": 2.6608187134502928e-05, "loss": 0.8399, "step": 100000 }, { "epoch": 4.68, "eval_loss": 2.486598253250122, "eval_runtime": 55.142, "eval_samples_per_second": 120.217, "eval_steps_per_second": 1.886, "step": 100000 }, { "epoch": 4.73, "learning_rate": 2.6374269005847957e-05, "loss": 0.8397, "step": 101000 }, { "epoch": 4.73, "eval_loss": 2.490933895111084, "eval_runtime": 55.1377, "eval_samples_per_second": 120.226, "eval_steps_per_second": 1.886, "step": 101000 }, { "epoch": 4.77, "learning_rate": 2.6140350877192983e-05, "loss": 0.8266, "step": 102000 }, { "epoch": 4.77, "eval_loss": 2.4587643146514893, "eval_runtime": 55.0944, "eval_samples_per_second": 120.321, "eval_steps_per_second": 1.888, "step": 102000 }, { "epoch": 4.82, "learning_rate": 2.5906432748538012e-05, "loss": 0.8231, "step": 103000 }, { "epoch": 4.82, "eval_loss": 2.4951488971710205, "eval_runtime": 55.155, "eval_samples_per_second": 120.189, "eval_steps_per_second": 1.886, "step": 103000 }, { "epoch": 4.87, "learning_rate": 2.567251461988304e-05, "loss": 0.8189, "step": 104000 }, { "epoch": 4.87, "eval_loss": 2.458134889602661, "eval_runtime": 55.0735, "eval_samples_per_second": 120.366, "eval_steps_per_second": 1.888, "step": 104000 }, { "epoch": 4.91, "learning_rate": 2.5438596491228074e-05, "loss": 0.8155, "step": 105000 }, { "epoch": 4.91, "eval_loss": 2.448225736618042, "eval_runtime": 55.0955, "eval_samples_per_second": 120.318, "eval_steps_per_second": 1.888, "step": 105000 }, { "epoch": 4.96, "learning_rate": 2.5204678362573103e-05, "loss": 0.8059, "step": 106000 }, { "epoch": 4.96, "eval_loss": 2.489133358001709, "eval_runtime": 55.1106, "eval_samples_per_second": 120.285, "eval_steps_per_second": 1.887, "step": 106000 }, { "epoch": 5.01, "learning_rate": 2.4970760233918132e-05, "loss": 0.8085, "step": 107000 }, { "epoch": 5.01, "eval_loss": 2.491405487060547, "eval_runtime": 55.0557, "eval_samples_per_second": 120.405, "eval_steps_per_second": 1.889, "step": 107000 }, { "epoch": 5.05, "learning_rate": 2.4736842105263158e-05, "loss": 0.7851, "step": 108000 }, { "epoch": 5.05, "eval_loss": 2.486567735671997, "eval_runtime": 55.0714, "eval_samples_per_second": 120.371, "eval_steps_per_second": 1.888, "step": 108000 }, { "epoch": 5.1, "learning_rate": 2.450292397660819e-05, "loss": 0.7827, "step": 109000 }, { "epoch": 5.1, "eval_loss": 2.480097532272339, "eval_runtime": 55.0814, "eval_samples_per_second": 120.349, "eval_steps_per_second": 1.888, "step": 109000 }, { "epoch": 5.15, "learning_rate": 2.4269005847953216e-05, "loss": 0.7813, "step": 110000 }, { "epoch": 5.15, "eval_loss": 2.4855968952178955, "eval_runtime": 55.078, "eval_samples_per_second": 120.357, "eval_steps_per_second": 1.888, "step": 110000 }, { "epoch": 5.19, "learning_rate": 2.4035087719298245e-05, "loss": 0.7829, "step": 111000 }, { "epoch": 5.19, "eval_loss": 2.462341785430908, "eval_runtime": 55.0705, "eval_samples_per_second": 120.373, "eval_steps_per_second": 1.888, "step": 111000 }, { "epoch": 5.24, "learning_rate": 2.3801169590643278e-05, "loss": 0.7724, "step": 112000 }, { "epoch": 5.24, "eval_loss": 2.478029251098633, "eval_runtime": 55.0837, "eval_samples_per_second": 120.344, "eval_steps_per_second": 1.888, "step": 112000 }, { "epoch": 5.29, "learning_rate": 2.3567251461988303e-05, "loss": 0.7646, "step": 113000 }, { "epoch": 5.29, "eval_loss": 2.4587323665618896, "eval_runtime": 55.1053, "eval_samples_per_second": 120.297, "eval_steps_per_second": 1.887, "step": 113000 }, { "epoch": 5.33, "learning_rate": 2.3333333333333336e-05, "loss": 0.7604, "step": 114000 }, { "epoch": 5.33, "eval_loss": 2.453984498977661, "eval_runtime": 55.0903, "eval_samples_per_second": 120.33, "eval_steps_per_second": 1.888, "step": 114000 }, { "epoch": 5.38, "learning_rate": 2.309941520467836e-05, "loss": 0.7518, "step": 115000 }, { "epoch": 5.38, "eval_loss": 2.488924026489258, "eval_runtime": 55.1009, "eval_samples_per_second": 120.307, "eval_steps_per_second": 1.887, "step": 115000 }, { "epoch": 5.43, "learning_rate": 2.2865497076023394e-05, "loss": 0.7515, "step": 116000 }, { "epoch": 5.43, "eval_loss": 2.4510860443115234, "eval_runtime": 55.088, "eval_samples_per_second": 120.335, "eval_steps_per_second": 1.888, "step": 116000 }, { "epoch": 5.47, "learning_rate": 2.2631578947368423e-05, "loss": 0.7511, "step": 117000 }, { "epoch": 5.47, "eval_loss": 2.468933343887329, "eval_runtime": 55.0676, "eval_samples_per_second": 120.379, "eval_steps_per_second": 1.889, "step": 117000 }, { "epoch": 5.52, "learning_rate": 2.2397660818713452e-05, "loss": 0.7424, "step": 118000 }, { "epoch": 5.52, "eval_loss": 2.4676008224487305, "eval_runtime": 55.1052, "eval_samples_per_second": 120.297, "eval_steps_per_second": 1.887, "step": 118000 }, { "epoch": 5.57, "learning_rate": 2.216374269005848e-05, "loss": 0.7327, "step": 119000 }, { "epoch": 5.57, "eval_loss": 2.482384443283081, "eval_runtime": 55.0883, "eval_samples_per_second": 120.334, "eval_steps_per_second": 1.888, "step": 119000 }, { "epoch": 5.61, "learning_rate": 2.1929824561403507e-05, "loss": 0.7349, "step": 120000 }, { "epoch": 5.61, "eval_loss": 2.450364351272583, "eval_runtime": 55.0642, "eval_samples_per_second": 120.387, "eval_steps_per_second": 1.889, "step": 120000 }, { "epoch": 5.66, "learning_rate": 2.169590643274854e-05, "loss": 0.7307, "step": 121000 }, { "epoch": 5.66, "eval_loss": 2.4753456115722656, "eval_runtime": 55.0827, "eval_samples_per_second": 120.346, "eval_steps_per_second": 1.888, "step": 121000 }, { "epoch": 5.71, "learning_rate": 2.146198830409357e-05, "loss": 0.7269, "step": 122000 }, { "epoch": 5.71, "eval_loss": 2.463690757751465, "eval_runtime": 55.087, "eval_samples_per_second": 120.337, "eval_steps_per_second": 1.888, "step": 122000 }, { "epoch": 5.75, "learning_rate": 2.1228070175438598e-05, "loss": 0.7175, "step": 123000 }, { "epoch": 5.75, "eval_loss": 2.4744393825531006, "eval_runtime": 55.0809, "eval_samples_per_second": 120.35, "eval_steps_per_second": 1.888, "step": 123000 }, { "epoch": 5.8, "learning_rate": 2.0994152046783627e-05, "loss": 0.7178, "step": 124000 }, { "epoch": 5.8, "eval_loss": 2.4851980209350586, "eval_runtime": 55.0877, "eval_samples_per_second": 120.335, "eval_steps_per_second": 1.888, "step": 124000 }, { "epoch": 5.85, "learning_rate": 2.0760233918128656e-05, "loss": 0.7048, "step": 125000 }, { "epoch": 5.85, "eval_loss": 2.5102007389068604, "eval_runtime": 55.1078, "eval_samples_per_second": 120.291, "eval_steps_per_second": 1.887, "step": 125000 }, { "epoch": 5.89, "learning_rate": 2.0526315789473685e-05, "loss": 0.7072, "step": 126000 }, { "epoch": 5.89, "eval_loss": 2.5026237964630127, "eval_runtime": 55.1176, "eval_samples_per_second": 120.27, "eval_steps_per_second": 1.887, "step": 126000 }, { "epoch": 5.94, "learning_rate": 2.0292397660818714e-05, "loss": 0.7054, "step": 127000 }, { "epoch": 5.94, "eval_loss": 2.4804298877716064, "eval_runtime": 55.0663, "eval_samples_per_second": 120.382, "eval_steps_per_second": 1.889, "step": 127000 }, { "epoch": 5.99, "learning_rate": 2.0058479532163744e-05, "loss": 0.7019, "step": 128000 }, { "epoch": 5.99, "eval_loss": 2.4398744106292725, "eval_runtime": 54.9972, "eval_samples_per_second": 120.533, "eval_steps_per_second": 1.891, "step": 128000 }, { "epoch": 6.03, "learning_rate": 1.9824561403508773e-05, "loss": 0.6942, "step": 129000 }, { "epoch": 6.03, "eval_loss": 2.4618844985961914, "eval_runtime": 55.1004, "eval_samples_per_second": 120.308, "eval_steps_per_second": 1.887, "step": 129000 }, { "epoch": 6.08, "learning_rate": 1.9590643274853802e-05, "loss": 0.6842, "step": 130000 }, { "epoch": 6.08, "eval_loss": 2.496403217315674, "eval_runtime": 55.0871, "eval_samples_per_second": 120.337, "eval_steps_per_second": 1.888, "step": 130000 }, { "epoch": 6.13, "learning_rate": 1.935672514619883e-05, "loss": 0.6859, "step": 131000 }, { "epoch": 6.13, "eval_loss": 2.483705520629883, "eval_runtime": 55.0869, "eval_samples_per_second": 120.337, "eval_steps_per_second": 1.888, "step": 131000 }, { "epoch": 6.18, "learning_rate": 1.9122807017543863e-05, "loss": 0.6742, "step": 132000 }, { "epoch": 6.18, "eval_loss": 2.489377498626709, "eval_runtime": 55.1198, "eval_samples_per_second": 120.265, "eval_steps_per_second": 1.887, "step": 132000 }, { "epoch": 6.22, "learning_rate": 1.888888888888889e-05, "loss": 0.6818, "step": 133000 }, { "epoch": 6.22, "eval_loss": 2.507904052734375, "eval_runtime": 55.1222, "eval_samples_per_second": 120.26, "eval_steps_per_second": 1.887, "step": 133000 }, { "epoch": 6.27, "learning_rate": 1.8654970760233918e-05, "loss": 0.6742, "step": 134000 }, { "epoch": 6.27, "eval_loss": 2.4935832023620605, "eval_runtime": 55.1223, "eval_samples_per_second": 120.26, "eval_steps_per_second": 1.887, "step": 134000 }, { "epoch": 6.32, "learning_rate": 1.8421052631578947e-05, "loss": 0.6756, "step": 135000 }, { "epoch": 6.32, "eval_loss": 2.512763023376465, "eval_runtime": 55.167, "eval_samples_per_second": 120.162, "eval_steps_per_second": 1.885, "step": 135000 }, { "epoch": 6.36, "learning_rate": 1.8187134502923976e-05, "loss": 0.6635, "step": 136000 }, { "epoch": 6.36, "eval_loss": 2.5170469284057617, "eval_runtime": 55.1756, "eval_samples_per_second": 120.144, "eval_steps_per_second": 1.885, "step": 136000 }, { "epoch": 6.41, "learning_rate": 1.795321637426901e-05, "loss": 0.6645, "step": 137000 }, { "epoch": 6.41, "eval_loss": 2.5008370876312256, "eval_runtime": 55.1095, "eval_samples_per_second": 120.288, "eval_steps_per_second": 1.887, "step": 137000 }, { "epoch": 6.46, "learning_rate": 1.7719298245614035e-05, "loss": 0.6617, "step": 138000 }, { "epoch": 6.46, "eval_loss": 2.503709316253662, "eval_runtime": 55.1047, "eval_samples_per_second": 120.298, "eval_steps_per_second": 1.887, "step": 138000 }, { "epoch": 6.5, "learning_rate": 1.7485380116959067e-05, "loss": 0.6574, "step": 139000 }, { "epoch": 6.5, "eval_loss": 2.4953572750091553, "eval_runtime": 55.0727, "eval_samples_per_second": 120.368, "eval_steps_per_second": 1.888, "step": 139000 }, { "epoch": 6.55, "learning_rate": 1.7251461988304093e-05, "loss": 0.6519, "step": 140000 }, { "epoch": 6.55, "eval_loss": 2.519571304321289, "eval_runtime": 55.1072, "eval_samples_per_second": 120.293, "eval_steps_per_second": 1.887, "step": 140000 }, { "epoch": 6.6, "learning_rate": 1.7017543859649125e-05, "loss": 0.6453, "step": 141000 }, { "epoch": 6.6, "eval_loss": 2.485342502593994, "eval_runtime": 55.0939, "eval_samples_per_second": 120.322, "eval_steps_per_second": 1.888, "step": 141000 }, { "epoch": 6.64, "learning_rate": 1.6783625730994155e-05, "loss": 0.6445, "step": 142000 }, { "epoch": 6.64, "eval_loss": 2.485079765319824, "eval_runtime": 55.093, "eval_samples_per_second": 120.324, "eval_steps_per_second": 1.888, "step": 142000 }, { "epoch": 6.69, "learning_rate": 1.654970760233918e-05, "loss": 0.643, "step": 143000 }, { "epoch": 6.69, "eval_loss": 2.4923973083496094, "eval_runtime": 55.1032, "eval_samples_per_second": 120.302, "eval_steps_per_second": 1.887, "step": 143000 }, { "epoch": 6.74, "learning_rate": 1.6315789473684213e-05, "loss": 0.6373, "step": 144000 }, { "epoch": 6.74, "eval_loss": 2.5037529468536377, "eval_runtime": 55.0798, "eval_samples_per_second": 120.353, "eval_steps_per_second": 1.888, "step": 144000 }, { "epoch": 6.78, "learning_rate": 1.608187134502924e-05, "loss": 0.6292, "step": 145000 }, { "epoch": 6.78, "eval_loss": 2.488449811935425, "eval_runtime": 55.097, "eval_samples_per_second": 120.315, "eval_steps_per_second": 1.888, "step": 145000 }, { "epoch": 6.83, "learning_rate": 1.584795321637427e-05, "loss": 0.6386, "step": 146000 }, { "epoch": 6.83, "eval_loss": 2.482603073120117, "eval_runtime": 55.1088, "eval_samples_per_second": 120.289, "eval_steps_per_second": 1.887, "step": 146000 }, { "epoch": 6.88, "learning_rate": 1.56140350877193e-05, "loss": 0.6357, "step": 147000 }, { "epoch": 6.88, "eval_loss": 2.482375144958496, "eval_runtime": 55.1247, "eval_samples_per_second": 120.255, "eval_steps_per_second": 1.887, "step": 147000 }, { "epoch": 6.92, "learning_rate": 1.538011695906433e-05, "loss": 0.6251, "step": 148000 }, { "epoch": 6.92, "eval_loss": 2.4937736988067627, "eval_runtime": 55.1287, "eval_samples_per_second": 120.246, "eval_steps_per_second": 1.886, "step": 148000 }, { "epoch": 6.97, "learning_rate": 1.5146198830409358e-05, "loss": 0.624, "step": 149000 }, { "epoch": 6.97, "eval_loss": 2.5023653507232666, "eval_runtime": 55.1273, "eval_samples_per_second": 120.249, "eval_steps_per_second": 1.887, "step": 149000 }, { "epoch": 7.02, "learning_rate": 1.4912280701754386e-05, "loss": 0.6238, "step": 150000 }, { "epoch": 7.02, "eval_loss": 2.520798444747925, "eval_runtime": 55.0799, "eval_samples_per_second": 120.352, "eval_steps_per_second": 1.888, "step": 150000 }, { "epoch": 7.06, "learning_rate": 1.4678362573099417e-05, "loss": 0.6165, "step": 151000 }, { "epoch": 7.06, "eval_loss": 2.5339748859405518, "eval_runtime": 55.1169, "eval_samples_per_second": 120.272, "eval_steps_per_second": 1.887, "step": 151000 }, { "epoch": 7.11, "learning_rate": 1.4444444444444444e-05, "loss": 0.6119, "step": 152000 }, { "epoch": 7.11, "eval_loss": 2.5113964080810547, "eval_runtime": 55.0891, "eval_samples_per_second": 120.332, "eval_steps_per_second": 1.888, "step": 152000 }, { "epoch": 7.16, "learning_rate": 1.4210526315789475e-05, "loss": 0.6089, "step": 153000 }, { "epoch": 7.16, "eval_loss": 2.52811861038208, "eval_runtime": 55.112, "eval_samples_per_second": 120.282, "eval_steps_per_second": 1.887, "step": 153000 }, { "epoch": 7.2, "learning_rate": 1.3976608187134504e-05, "loss": 0.6035, "step": 154000 }, { "epoch": 7.2, "eval_loss": 2.5194358825683594, "eval_runtime": 55.1145, "eval_samples_per_second": 120.277, "eval_steps_per_second": 1.887, "step": 154000 }, { "epoch": 7.25, "learning_rate": 1.3742690058479531e-05, "loss": 0.6018, "step": 155000 }, { "epoch": 7.25, "eval_loss": 2.5066628456115723, "eval_runtime": 55.0979, "eval_samples_per_second": 120.313, "eval_steps_per_second": 1.888, "step": 155000 }, { "epoch": 7.3, "learning_rate": 1.3508771929824562e-05, "loss": 0.6016, "step": 156000 }, { "epoch": 7.3, "eval_loss": 2.490973711013794, "eval_runtime": 54.9953, "eval_samples_per_second": 120.538, "eval_steps_per_second": 1.891, "step": 156000 }, { "epoch": 7.34, "learning_rate": 1.327485380116959e-05, "loss": 0.6013, "step": 157000 }, { "epoch": 7.34, "eval_loss": 2.489246368408203, "eval_runtime": 54.9888, "eval_samples_per_second": 120.552, "eval_steps_per_second": 1.891, "step": 157000 }, { "epoch": 7.39, "learning_rate": 1.304093567251462e-05, "loss": 0.5958, "step": 158000 }, { "epoch": 7.39, "eval_loss": 2.528749704360962, "eval_runtime": 54.994, "eval_samples_per_second": 120.54, "eval_steps_per_second": 1.891, "step": 158000 }, { "epoch": 7.44, "learning_rate": 1.2807017543859651e-05, "loss": 0.5925, "step": 159000 }, { "epoch": 7.44, "eval_loss": 2.528515100479126, "eval_runtime": 54.9798, "eval_samples_per_second": 120.571, "eval_steps_per_second": 1.892, "step": 159000 }, { "epoch": 7.49, "learning_rate": 1.2573099415204679e-05, "loss": 0.5908, "step": 160000 }, { "epoch": 7.49, "eval_loss": 2.510267734527588, "eval_runtime": 55.0014, "eval_samples_per_second": 120.524, "eval_steps_per_second": 1.891, "step": 160000 }, { "epoch": 7.53, "learning_rate": 1.2339181286549708e-05, "loss": 0.587, "step": 161000 }, { "epoch": 7.53, "eval_loss": 2.533625602722168, "eval_runtime": 54.9987, "eval_samples_per_second": 120.53, "eval_steps_per_second": 1.891, "step": 161000 }, { "epoch": 7.58, "learning_rate": 1.2105263157894737e-05, "loss": 0.5851, "step": 162000 }, { "epoch": 7.58, "eval_loss": 2.538762331008911, "eval_runtime": 54.9696, "eval_samples_per_second": 120.594, "eval_steps_per_second": 1.892, "step": 162000 }, { "epoch": 7.63, "learning_rate": 1.1871345029239766e-05, "loss": 0.579, "step": 163000 }, { "epoch": 7.63, "eval_loss": 2.5098183155059814, "eval_runtime": 54.9924, "eval_samples_per_second": 120.544, "eval_steps_per_second": 1.891, "step": 163000 }, { "epoch": 7.67, "learning_rate": 1.1637426900584795e-05, "loss": 0.5764, "step": 164000 }, { "epoch": 7.67, "eval_loss": 2.5329983234405518, "eval_runtime": 55.0148, "eval_samples_per_second": 120.495, "eval_steps_per_second": 1.89, "step": 164000 }, { "epoch": 7.72, "learning_rate": 1.1403508771929824e-05, "loss": 0.5781, "step": 165000 }, { "epoch": 7.72, "eval_loss": 2.512319803237915, "eval_runtime": 54.9674, "eval_samples_per_second": 120.599, "eval_steps_per_second": 1.892, "step": 165000 }, { "epoch": 7.77, "learning_rate": 1.1169590643274855e-05, "loss": 0.5758, "step": 166000 }, { "epoch": 7.77, "eval_loss": 2.5034148693084717, "eval_runtime": 54.9854, "eval_samples_per_second": 120.559, "eval_steps_per_second": 1.891, "step": 166000 }, { "epoch": 7.81, "learning_rate": 1.0935672514619884e-05, "loss": 0.5792, "step": 167000 }, { "epoch": 7.81, "eval_loss": 2.525723934173584, "eval_runtime": 55.017, "eval_samples_per_second": 120.49, "eval_steps_per_second": 1.89, "step": 167000 }, { "epoch": 7.86, "learning_rate": 1.0701754385964913e-05, "loss": 0.5745, "step": 168000 }, { "epoch": 7.86, "eval_loss": 2.526042938232422, "eval_runtime": 54.987, "eval_samples_per_second": 120.556, "eval_steps_per_second": 1.891, "step": 168000 }, { "epoch": 7.91, "learning_rate": 1.0467836257309941e-05, "loss": 0.5702, "step": 169000 }, { "epoch": 7.91, "eval_loss": 2.5171217918395996, "eval_runtime": 54.976, "eval_samples_per_second": 120.58, "eval_steps_per_second": 1.892, "step": 169000 }, { "epoch": 7.95, "learning_rate": 1.023391812865497e-05, "loss": 0.5714, "step": 170000 }, { "epoch": 7.95, "eval_loss": 2.509648323059082, "eval_runtime": 54.9828, "eval_samples_per_second": 120.565, "eval_steps_per_second": 1.892, "step": 170000 }, { "epoch": 8.0, "learning_rate": 1e-05, "loss": 0.5692, "step": 171000 }, { "epoch": 8.0, "eval_loss": 2.4963207244873047, "eval_runtime": 54.9818, "eval_samples_per_second": 120.567, "eval_steps_per_second": 1.892, "step": 171000 }, { "epoch": 8.05, "learning_rate": 9.76608187134503e-06, "loss": 0.5541, "step": 172000 }, { "epoch": 8.05, "eval_loss": 2.5158822536468506, "eval_runtime": 54.9875, "eval_samples_per_second": 120.555, "eval_steps_per_second": 1.891, "step": 172000 }, { "epoch": 8.09, "learning_rate": 9.532163742690059e-06, "loss": 0.5609, "step": 173000 }, { "epoch": 8.09, "eval_loss": 2.52651047706604, "eval_runtime": 54.9727, "eval_samples_per_second": 120.587, "eval_steps_per_second": 1.892, "step": 173000 }, { "epoch": 8.14, "learning_rate": 9.298245614035088e-06, "loss": 0.5567, "step": 174000 }, { "epoch": 8.14, "eval_loss": 2.529944658279419, "eval_runtime": 54.9646, "eval_samples_per_second": 120.605, "eval_steps_per_second": 1.892, "step": 174000 }, { "epoch": 8.19, "learning_rate": 9.064327485380117e-06, "loss": 0.5593, "step": 175000 }, { "epoch": 8.19, "eval_loss": 2.5352935791015625, "eval_runtime": 54.9969, "eval_samples_per_second": 120.534, "eval_steps_per_second": 1.891, "step": 175000 }, { "epoch": 8.23, "learning_rate": 8.830409356725146e-06, "loss": 0.5537, "step": 176000 }, { "epoch": 8.23, "eval_loss": 2.5415403842926025, "eval_runtime": 54.9924, "eval_samples_per_second": 120.544, "eval_steps_per_second": 1.891, "step": 176000 }, { "epoch": 8.28, "learning_rate": 8.596491228070176e-06, "loss": 0.5465, "step": 177000 }, { "epoch": 8.28, "eval_loss": 2.5204358100891113, "eval_runtime": 55.0062, "eval_samples_per_second": 120.514, "eval_steps_per_second": 1.891, "step": 177000 }, { "epoch": 8.33, "learning_rate": 8.362573099415205e-06, "loss": 0.548, "step": 178000 }, { "epoch": 8.33, "eval_loss": 2.5008552074432373, "eval_runtime": 54.9691, "eval_samples_per_second": 120.595, "eval_steps_per_second": 1.892, "step": 178000 }, { "epoch": 8.37, "learning_rate": 8.128654970760234e-06, "loss": 0.5477, "step": 179000 }, { "epoch": 8.37, "eval_loss": 2.5255722999572754, "eval_runtime": 54.9912, "eval_samples_per_second": 120.547, "eval_steps_per_second": 1.891, "step": 179000 }, { "epoch": 8.42, "learning_rate": 7.894736842105263e-06, "loss": 0.5393, "step": 180000 }, { "epoch": 8.42, "eval_loss": 2.49310564994812, "eval_runtime": 54.9871, "eval_samples_per_second": 120.556, "eval_steps_per_second": 1.891, "step": 180000 }, { "epoch": 8.47, "learning_rate": 7.660818713450294e-06, "loss": 0.5441, "step": 181000 }, { "epoch": 8.47, "eval_loss": 2.5206234455108643, "eval_runtime": 54.9863, "eval_samples_per_second": 120.557, "eval_steps_per_second": 1.891, "step": 181000 }, { "epoch": 8.51, "learning_rate": 7.426900584795322e-06, "loss": 0.5419, "step": 182000 }, { "epoch": 8.51, "eval_loss": 2.511657476425171, "eval_runtime": 54.9931, "eval_samples_per_second": 120.542, "eval_steps_per_second": 1.891, "step": 182000 }, { "epoch": 8.56, "learning_rate": 7.192982456140351e-06, "loss": 0.5377, "step": 183000 }, { "epoch": 8.56, "eval_loss": 2.534726142883301, "eval_runtime": 55.0074, "eval_samples_per_second": 120.511, "eval_steps_per_second": 1.891, "step": 183000 }, { "epoch": 8.61, "learning_rate": 6.95906432748538e-06, "loss": 0.5375, "step": 184000 }, { "epoch": 8.61, "eval_loss": 2.4978044033050537, "eval_runtime": 55.0077, "eval_samples_per_second": 120.51, "eval_steps_per_second": 1.891, "step": 184000 }, { "epoch": 8.65, "learning_rate": 6.725146198830409e-06, "loss": 0.5375, "step": 185000 }, { "epoch": 8.65, "eval_loss": 2.4929347038269043, "eval_runtime": 54.9953, "eval_samples_per_second": 120.537, "eval_steps_per_second": 1.891, "step": 185000 }, { "epoch": 8.7, "learning_rate": 6.4912280701754385e-06, "loss": 0.5354, "step": 186000 }, { "epoch": 8.7, "eval_loss": 2.4908556938171387, "eval_runtime": 55.0037, "eval_samples_per_second": 120.519, "eval_steps_per_second": 1.891, "step": 186000 }, { "epoch": 8.75, "learning_rate": 6.2573099415204685e-06, "loss": 0.5318, "step": 187000 }, { "epoch": 8.75, "eval_loss": 2.531054973602295, "eval_runtime": 54.9993, "eval_samples_per_second": 120.529, "eval_steps_per_second": 1.891, "step": 187000 }, { "epoch": 8.8, "learning_rate": 6.023391812865498e-06, "loss": 0.5338, "step": 188000 }, { "epoch": 8.8, "eval_loss": 2.5138602256774902, "eval_runtime": 54.9949, "eval_samples_per_second": 120.539, "eval_steps_per_second": 1.891, "step": 188000 }, { "epoch": 8.84, "learning_rate": 5.789473684210527e-06, "loss": 0.5247, "step": 189000 }, { "epoch": 8.84, "eval_loss": 2.5182831287384033, "eval_runtime": 54.9996, "eval_samples_per_second": 120.528, "eval_steps_per_second": 1.891, "step": 189000 }, { "epoch": 8.89, "learning_rate": 5.555555555555556e-06, "loss": 0.5249, "step": 190000 }, { "epoch": 8.89, "eval_loss": 2.5073628425598145, "eval_runtime": 54.9824, "eval_samples_per_second": 120.566, "eval_steps_per_second": 1.892, "step": 190000 }, { "epoch": 8.94, "learning_rate": 5.321637426900585e-06, "loss": 0.5266, "step": 191000 }, { "epoch": 8.94, "eval_loss": 2.5005078315734863, "eval_runtime": 54.9464, "eval_samples_per_second": 120.645, "eval_steps_per_second": 1.893, "step": 191000 }, { "epoch": 8.98, "learning_rate": 5.087719298245614e-06, "loss": 0.5279, "step": 192000 }, { "epoch": 8.98, "eval_loss": 2.5144731998443604, "eval_runtime": 54.9856, "eval_samples_per_second": 120.559, "eval_steps_per_second": 1.891, "step": 192000 }, { "epoch": 9.03, "learning_rate": 4.853801169590644e-06, "loss": 0.5231, "step": 193000 }, { "epoch": 9.03, "eval_loss": 2.5163862705230713, "eval_runtime": 54.965, "eval_samples_per_second": 120.604, "eval_steps_per_second": 1.892, "step": 193000 }, { "epoch": 9.08, "learning_rate": 4.619883040935673e-06, "loss": 0.5157, "step": 194000 }, { "epoch": 9.08, "eval_loss": 2.4902589321136475, "eval_runtime": 54.9685, "eval_samples_per_second": 120.596, "eval_steps_per_second": 1.892, "step": 194000 }, { "epoch": 9.12, "learning_rate": 4.3859649122807014e-06, "loss": 0.5153, "step": 195000 }, { "epoch": 9.12, "eval_loss": 2.5248496532440186, "eval_runtime": 55.0107, "eval_samples_per_second": 120.504, "eval_steps_per_second": 1.891, "step": 195000 }, { "epoch": 9.17, "learning_rate": 4.152046783625731e-06, "loss": 0.5238, "step": 196000 }, { "epoch": 9.17, "eval_loss": 2.4956910610198975, "eval_runtime": 54.9681, "eval_samples_per_second": 120.597, "eval_steps_per_second": 1.892, "step": 196000 }, { "epoch": 9.22, "learning_rate": 3.9181286549707605e-06, "loss": 0.5229, "step": 197000 }, { "epoch": 9.22, "eval_loss": 2.509634256362915, "eval_runtime": 55.0395, "eval_samples_per_second": 120.441, "eval_steps_per_second": 1.89, "step": 197000 }, { "epoch": 9.26, "learning_rate": 3.6842105263157892e-06, "loss": 0.5099, "step": 198000 }, { "epoch": 9.26, "eval_loss": 2.505375862121582, "eval_runtime": 54.9659, "eval_samples_per_second": 120.602, "eval_steps_per_second": 1.892, "step": 198000 }, { "epoch": 9.31, "learning_rate": 3.4502923976608188e-06, "loss": 0.5164, "step": 199000 }, { "epoch": 9.31, "eval_loss": 2.512755870819092, "eval_runtime": 54.9727, "eval_samples_per_second": 120.587, "eval_steps_per_second": 1.892, "step": 199000 }, { "epoch": 9.36, "learning_rate": 3.216374269005848e-06, "loss": 0.5147, "step": 200000 }, { "epoch": 9.36, "eval_loss": 2.5104758739471436, "eval_runtime": 54.9829, "eval_samples_per_second": 120.565, "eval_steps_per_second": 1.891, "step": 200000 }, { "epoch": 9.4, "learning_rate": 2.9824561403508774e-06, "loss": 0.5092, "step": 201000 }, { "epoch": 9.4, "eval_loss": 2.5510807037353516, "eval_runtime": 54.9886, "eval_samples_per_second": 120.552, "eval_steps_per_second": 1.891, "step": 201000 }, { "epoch": 9.45, "learning_rate": 2.7485380116959066e-06, "loss": 0.5123, "step": 202000 }, { "epoch": 9.45, "eval_loss": 2.4837098121643066, "eval_runtime": 54.9612, "eval_samples_per_second": 120.612, "eval_steps_per_second": 1.892, "step": 202000 }, { "epoch": 9.5, "learning_rate": 2.5146198830409357e-06, "loss": 0.5077, "step": 203000 }, { "epoch": 9.5, "eval_loss": 2.5026121139526367, "eval_runtime": 55.0018, "eval_samples_per_second": 120.523, "eval_steps_per_second": 1.891, "step": 203000 }, { "epoch": 9.54, "learning_rate": 2.2807017543859652e-06, "loss": 0.5112, "step": 204000 }, { "epoch": 9.54, "eval_loss": 2.514636278152466, "eval_runtime": 54.9811, "eval_samples_per_second": 120.569, "eval_steps_per_second": 1.892, "step": 204000 }, { "epoch": 9.59, "learning_rate": 2.0467836257309943e-06, "loss": 0.5033, "step": 205000 }, { "epoch": 9.59, "eval_loss": 2.537416696548462, "eval_runtime": 54.983, "eval_samples_per_second": 120.565, "eval_steps_per_second": 1.891, "step": 205000 }, { "epoch": 9.64, "learning_rate": 1.8128654970760235e-06, "loss": 0.5111, "step": 206000 }, { "epoch": 9.64, "eval_loss": 2.515895366668701, "eval_runtime": 54.9923, "eval_samples_per_second": 120.544, "eval_steps_per_second": 1.891, "step": 206000 }, { "epoch": 9.68, "learning_rate": 1.5789473684210528e-06, "loss": 0.5119, "step": 207000 }, { "epoch": 9.68, "eval_loss": 2.5189149379730225, "eval_runtime": 54.9887, "eval_samples_per_second": 120.552, "eval_steps_per_second": 1.891, "step": 207000 }, { "epoch": 9.73, "learning_rate": 1.345029239766082e-06, "loss": 0.5022, "step": 208000 }, { "epoch": 9.73, "eval_loss": 2.506300926208496, "eval_runtime": 54.9799, "eval_samples_per_second": 120.571, "eval_steps_per_second": 1.892, "step": 208000 }, { "epoch": 9.78, "learning_rate": 1.1111111111111112e-06, "loss": 0.5051, "step": 209000 }, { "epoch": 9.78, "eval_loss": 2.4811651706695557, "eval_runtime": 54.958, "eval_samples_per_second": 120.619, "eval_steps_per_second": 1.892, "step": 209000 }, { "epoch": 9.82, "learning_rate": 8.771929824561404e-07, "loss": 0.5028, "step": 210000 }, { "epoch": 9.82, "eval_loss": 2.4914138317108154, "eval_runtime": 55.0024, "eval_samples_per_second": 120.522, "eval_steps_per_second": 1.891, "step": 210000 }, { "epoch": 9.87, "learning_rate": 6.432748538011697e-07, "loss": 0.5066, "step": 211000 }, { "epoch": 9.87, "eval_loss": 2.5056285858154297, "eval_runtime": 54.9649, "eval_samples_per_second": 120.604, "eval_steps_per_second": 1.892, "step": 211000 }, { "epoch": 9.92, "learning_rate": 4.093567251461989e-07, "loss": 0.5058, "step": 212000 }, { "epoch": 9.92, "eval_loss": 2.53446102142334, "eval_runtime": 54.9817, "eval_samples_per_second": 120.567, "eval_steps_per_second": 1.892, "step": 212000 }, { "epoch": 9.96, "learning_rate": 1.7543859649122808e-07, "loss": 0.507, "step": 213000 }, { "epoch": 9.96, "eval_loss": 2.507356882095337, "eval_runtime": 55.001, "eval_samples_per_second": 120.525, "eval_steps_per_second": 1.891, "step": 213000 } ], "max_steps": 213750, "num_train_epochs": 10, "total_flos": 4.4847043698061394e+18, "trial_name": null, "trial_params": null }