{ "best_metric": null, "best_model_checkpoint": null, "epoch": 285.7142857142857, "eval_steps": 500, "global_step": 5500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.99, "eval_gen_len": 14.487, "eval_loss": 21.441953659057617, "eval_rouge1": 0.0832, "eval_rouge2": 0.0126, "eval_rougeL": 0.063, "eval_rougeLsum": 0.0631, "eval_runtime": 16.3828, "eval_samples_per_second": 7.02, "eval_steps_per_second": 1.221, "step": 19 }, { "epoch": 1.97, "eval_gen_len": 14.6261, "eval_loss": 21.211212158203125, "eval_rouge1": 0.0858, "eval_rouge2": 0.014, "eval_rougeL": 0.0648, "eval_rougeLsum": 0.0652, "eval_runtime": 15.2254, "eval_samples_per_second": 7.553, "eval_steps_per_second": 1.314, "step": 38 }, { "epoch": 2.96, "eval_gen_len": 14.6783, "eval_loss": 20.936405181884766, "eval_rouge1": 0.0866, "eval_rouge2": 0.0147, "eval_rougeL": 0.0655, "eval_rougeLsum": 0.066, "eval_runtime": 18.5865, "eval_samples_per_second": 6.187, "eval_steps_per_second": 1.076, "step": 57 }, { "epoch": 4.0, "eval_gen_len": 14.8522, "eval_loss": 20.667041778564453, "eval_rouge1": 0.088, "eval_rouge2": 0.0145, "eval_rougeL": 0.0659, "eval_rougeLsum": 0.0663, "eval_runtime": 12.6199, "eval_samples_per_second": 9.113, "eval_steps_per_second": 1.585, "step": 77 }, { "epoch": 4.99, "eval_gen_len": 15.113, "eval_loss": 20.46696662902832, "eval_rouge1": 0.0912, "eval_rouge2": 0.0145, "eval_rougeL": 0.0677, "eval_rougeLsum": 0.0677, "eval_runtime": 15.7216, "eval_samples_per_second": 7.315, "eval_steps_per_second": 1.272, "step": 96 }, { "epoch": 5.97, "eval_gen_len": 15.2087, "eval_loss": 20.282737731933594, "eval_rouge1": 0.0913, "eval_rouge2": 0.0145, "eval_rougeL": 0.0679, "eval_rougeLsum": 0.068, "eval_runtime": 15.2977, "eval_samples_per_second": 7.517, "eval_steps_per_second": 1.307, "step": 115 }, { "epoch": 6.96, "eval_gen_len": 15.4087, "eval_loss": 20.09146499633789, "eval_rouge1": 0.0918, "eval_rouge2": 0.0137, "eval_rougeL": 0.0686, "eval_rougeLsum": 0.0687, "eval_runtime": 16.9649, "eval_samples_per_second": 6.779, "eval_steps_per_second": 1.179, "step": 134 }, { "epoch": 8.0, "eval_gen_len": 16.0435, "eval_loss": 19.872163772583008, "eval_rouge1": 0.0969, "eval_rouge2": 0.0164, "eval_rougeL": 0.0736, "eval_rougeLsum": 0.0737, "eval_runtime": 15.3373, "eval_samples_per_second": 7.498, "eval_steps_per_second": 1.304, "step": 154 }, { "epoch": 8.99, "eval_gen_len": 16.5739, "eval_loss": 19.655122756958008, "eval_rouge1": 0.1052, "eval_rouge2": 0.0198, "eval_rougeL": 0.0799, "eval_rougeLsum": 0.0796, "eval_runtime": 13.8235, "eval_samples_per_second": 8.319, "eval_steps_per_second": 1.447, "step": 173 }, { "epoch": 9.97, "eval_gen_len": 17.0435, "eval_loss": 19.420446395874023, "eval_rouge1": 0.1071, "eval_rouge2": 0.0188, "eval_rougeL": 0.0809, "eval_rougeLsum": 0.0808, "eval_runtime": 14.7139, "eval_samples_per_second": 7.816, "eval_steps_per_second": 1.359, "step": 192 }, { "epoch": 10.96, "eval_gen_len": 17.1913, "eval_loss": 19.156597137451172, "eval_rouge1": 0.1061, "eval_rouge2": 0.0185, "eval_rougeL": 0.0815, "eval_rougeLsum": 0.0819, "eval_runtime": 14.1553, "eval_samples_per_second": 8.124, "eval_steps_per_second": 1.413, "step": 211 }, { "epoch": 12.0, "eval_gen_len": 17.2522, "eval_loss": 18.833667755126953, "eval_rouge1": 0.1069, "eval_rouge2": 0.0213, "eval_rougeL": 0.0826, "eval_rougeLsum": 0.0828, "eval_runtime": 15.232, "eval_samples_per_second": 7.55, "eval_steps_per_second": 1.313, "step": 231 }, { "epoch": 12.99, "eval_gen_len": 17.287, "eval_loss": 18.463964462280273, "eval_rouge1": 0.1105, "eval_rouge2": 0.0234, "eval_rougeL": 0.0858, "eval_rougeLsum": 0.0852, "eval_runtime": 15.0679, "eval_samples_per_second": 7.632, "eval_steps_per_second": 1.327, "step": 250 }, { "epoch": 13.97, "eval_gen_len": 17.4696, "eval_loss": 18.000520706176758, "eval_rouge1": 0.1101, "eval_rouge2": 0.0232, "eval_rougeL": 0.0873, "eval_rougeLsum": 0.0872, "eval_runtime": 14.073, "eval_samples_per_second": 8.172, "eval_steps_per_second": 1.421, "step": 269 }, { "epoch": 14.96, "eval_gen_len": 17.2261, "eval_loss": 17.395872116088867, "eval_rouge1": 0.103, "eval_rouge2": 0.023, "eval_rougeL": 0.0821, "eval_rougeLsum": 0.0819, "eval_runtime": 17.5824, "eval_samples_per_second": 6.541, "eval_steps_per_second": 1.138, "step": 288 }, { "epoch": 16.0, "eval_gen_len": 17.6783, "eval_loss": 16.634456634521484, "eval_rouge1": 0.1034, "eval_rouge2": 0.0209, "eval_rougeL": 0.0804, "eval_rougeLsum": 0.0802, "eval_runtime": 14.5519, "eval_samples_per_second": 7.903, "eval_steps_per_second": 1.374, "step": 308 }, { "epoch": 16.99, "eval_gen_len": 16.3565, "eval_loss": 15.872416496276855, "eval_rouge1": 0.0841, "eval_rouge2": 0.0149, "eval_rougeL": 0.0674, "eval_rougeLsum": 0.0674, "eval_runtime": 15.4052, "eval_samples_per_second": 7.465, "eval_steps_per_second": 1.298, "step": 327 }, { "epoch": 17.97, "eval_gen_len": 15.2609, "eval_loss": 15.058935165405273, "eval_rouge1": 0.0697, "eval_rouge2": 0.0097, "eval_rougeL": 0.0554, "eval_rougeLsum": 0.0556, "eval_runtime": 22.9079, "eval_samples_per_second": 5.02, "eval_steps_per_second": 0.873, "step": 346 }, { "epoch": 18.96, "eval_gen_len": 14.7304, "eval_loss": 14.074901580810547, "eval_rouge1": 0.0584, "eval_rouge2": 0.0065, "eval_rougeL": 0.047, "eval_rougeLsum": 0.0472, "eval_runtime": 13.8432, "eval_samples_per_second": 8.307, "eval_steps_per_second": 1.445, "step": 365 }, { "epoch": 20.0, "eval_gen_len": 12.0783, "eval_loss": 12.981775283813477, "eval_rouge1": 0.037, "eval_rouge2": 0.004, "eval_rougeL": 0.0314, "eval_rougeLsum": 0.0312, "eval_runtime": 17.4992, "eval_samples_per_second": 6.572, "eval_steps_per_second": 1.143, "step": 385 }, { "epoch": 20.99, "eval_gen_len": 13.3043, "eval_loss": 12.14104175567627, "eval_rouge1": 0.0327, "eval_rouge2": 0.0027, "eval_rougeL": 0.0287, "eval_rougeLsum": 0.0288, "eval_runtime": 14.6695, "eval_samples_per_second": 7.839, "eval_steps_per_second": 1.363, "step": 404 }, { "epoch": 21.97, "eval_gen_len": 14.1565, "eval_loss": 11.347674369812012, "eval_rouge1": 0.0206, "eval_rouge2": 0.0006, "eval_rougeL": 0.0188, "eval_rougeLsum": 0.019, "eval_runtime": 14.3559, "eval_samples_per_second": 8.011, "eval_steps_per_second": 1.393, "step": 423 }, { "epoch": 22.96, "eval_gen_len": 14.5652, "eval_loss": 10.547377586364746, "eval_rouge1": 0.0136, "eval_rouge2": 0.0008, "eval_rougeL": 0.0121, "eval_rougeLsum": 0.0123, "eval_runtime": 16.36, "eval_samples_per_second": 7.029, "eval_steps_per_second": 1.222, "step": 442 }, { "epoch": 24.0, "eval_gen_len": 15.9391, "eval_loss": 9.721901893615723, "eval_rouge1": 0.0056, "eval_rouge2": 0.0, "eval_rougeL": 0.0051, "eval_rougeLsum": 0.0051, "eval_runtime": 17.3804, "eval_samples_per_second": 6.617, "eval_steps_per_second": 1.151, "step": 462 }, { "epoch": 24.99, "eval_gen_len": 17.0522, "eval_loss": 8.976031303405762, "eval_rouge1": 0.0029, "eval_rouge2": 0.0, "eval_rougeL": 0.0026, "eval_rougeLsum": 0.0026, "eval_runtime": 15.7718, "eval_samples_per_second": 7.291, "eval_steps_per_second": 1.268, "step": 481 }, { "epoch": 25.97, "grad_norm": 6.211065292358398, "learning_rate": 1.8252631578947372e-05, "loss": 16.8471, "step": 500 }, { "epoch": 25.97, "eval_gen_len": 18.0261, "eval_loss": 8.254261016845703, "eval_rouge1": 0.001, "eval_rouge2": 0.0, "eval_rougeL": 0.001, "eval_rougeLsum": 0.001, "eval_runtime": 18.4696, "eval_samples_per_second": 6.226, "eval_steps_per_second": 1.083, "step": 500 }, { "epoch": 26.96, "eval_gen_len": 18.8609, "eval_loss": 7.542705059051514, "eval_rouge1": 0.0008, "eval_rouge2": 0.0, "eval_rougeL": 0.0009, "eval_rougeLsum": 0.0008, "eval_runtime": 14.9383, "eval_samples_per_second": 7.698, "eval_steps_per_second": 1.339, "step": 519 }, { "epoch": 28.0, "eval_gen_len": 19.0, "eval_loss": 6.831495761871338, "eval_rouge1": 0.0007, "eval_rouge2": 0.0, "eval_rougeL": 0.0007, "eval_rougeLsum": 0.0007, "eval_runtime": 15.5617, "eval_samples_per_second": 7.39, "eval_steps_per_second": 1.285, "step": 539 }, { "epoch": 28.99, "eval_gen_len": 19.0, "eval_loss": 6.190303325653076, "eval_rouge1": 0.0002, "eval_rouge2": 0.0, "eval_rougeL": 0.0002, "eval_rougeLsum": 0.0002, "eval_runtime": 15.014, "eval_samples_per_second": 7.66, "eval_steps_per_second": 1.332, "step": 558 }, { "epoch": 29.97, "eval_gen_len": 19.0, "eval_loss": 5.610296726226807, "eval_rouge1": 0.0018, "eval_rouge2": 0.0, "eval_rougeL": 0.0016, "eval_rougeLsum": 0.0016, "eval_runtime": 15.3165, "eval_samples_per_second": 7.508, "eval_steps_per_second": 1.306, "step": 577 }, { "epoch": 30.96, "eval_gen_len": 19.0, "eval_loss": 5.068519592285156, "eval_rouge1": 0.0011, "eval_rouge2": 0.0, "eval_rougeL": 0.0011, "eval_rougeLsum": 0.0011, "eval_runtime": 16.2303, "eval_samples_per_second": 7.086, "eval_steps_per_second": 1.232, "step": 596 }, { "epoch": 32.0, "eval_gen_len": 19.0, "eval_loss": 4.54244327545166, "eval_rouge1": 0.0009, "eval_rouge2": 0.0, "eval_rougeL": 0.0009, "eval_rougeLsum": 0.0009, "eval_runtime": 14.4723, "eval_samples_per_second": 7.946, "eval_steps_per_second": 1.382, "step": 616 }, { "epoch": 32.99, "eval_gen_len": 19.0, "eval_loss": 4.084940433502197, "eval_rouge1": 0.0008, "eval_rouge2": 0.0, "eval_rougeL": 0.0008, "eval_rougeLsum": 0.0008, "eval_runtime": 17.3788, "eval_samples_per_second": 6.617, "eval_steps_per_second": 1.151, "step": 635 }, { "epoch": 33.97, "eval_gen_len": 19.0, "eval_loss": 3.7023561000823975, "eval_rouge1": 0.0014, "eval_rouge2": 0.0, "eval_rougeL": 0.0015, "eval_rougeLsum": 0.0015, "eval_runtime": 17.4976, "eval_samples_per_second": 6.572, "eval_steps_per_second": 1.143, "step": 654 }, { "epoch": 34.96, "eval_gen_len": 19.0, "eval_loss": 3.3644134998321533, "eval_rouge1": 0.0035, "eval_rouge2": 0.0, "eval_rougeL": 0.0035, "eval_rougeLsum": 0.0035, "eval_runtime": 15.0993, "eval_samples_per_second": 7.616, "eval_steps_per_second": 1.325, "step": 673 }, { "epoch": 36.0, "eval_gen_len": 19.0, "eval_loss": 3.0496110916137695, "eval_rouge1": 0.0064, "eval_rouge2": 0.0002, "eval_rougeL": 0.0063, "eval_rougeLsum": 0.0064, "eval_runtime": 26.2608, "eval_samples_per_second": 4.379, "eval_steps_per_second": 0.762, "step": 693 }, { "epoch": 36.99, "eval_gen_len": 18.9913, "eval_loss": 2.7962286472320557, "eval_rouge1": 0.0073, "eval_rouge2": 0.0002, "eval_rougeL": 0.0073, "eval_rougeLsum": 0.0074, "eval_runtime": 16.0565, "eval_samples_per_second": 7.162, "eval_steps_per_second": 1.246, "step": 712 }, { "epoch": 37.97, "eval_gen_len": 18.8435, "eval_loss": 2.5821166038513184, "eval_rouge1": 0.0078, "eval_rouge2": 0.0002, "eval_rougeL": 0.0076, "eval_rougeLsum": 0.0078, "eval_runtime": 24.2703, "eval_samples_per_second": 4.738, "eval_steps_per_second": 0.824, "step": 731 }, { "epoch": 38.96, "eval_gen_len": 16.9043, "eval_loss": 2.4025700092315674, "eval_rouge1": 0.0063, "eval_rouge2": 0.0, "eval_rougeL": 0.0063, "eval_rougeLsum": 0.0063, "eval_runtime": 12.9295, "eval_samples_per_second": 8.894, "eval_steps_per_second": 1.547, "step": 750 }, { "epoch": 40.0, "eval_gen_len": 9.6696, "eval_loss": 2.2464537620544434, "eval_rouge1": 0.0008, "eval_rouge2": 0.0, "eval_rougeL": 0.0008, "eval_rougeLsum": 0.0008, "eval_runtime": 14.656, "eval_samples_per_second": 7.847, "eval_steps_per_second": 1.365, "step": 770 }, { "epoch": 40.99, "eval_gen_len": 7.4435, "eval_loss": 2.124486207962036, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 21.0326, "eval_samples_per_second": 5.468, "eval_steps_per_second": 0.951, "step": 789 }, { "epoch": 41.97, "eval_gen_len": 6.9478, "eval_loss": 2.022434949874878, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.9888, "eval_samples_per_second": 8.854, "eval_steps_per_second": 1.54, "step": 808 }, { "epoch": 42.96, "eval_gen_len": 6.4696, "eval_loss": 1.9459978342056274, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 10.4716, "eval_samples_per_second": 10.982, "eval_steps_per_second": 1.91, "step": 827 }, { "epoch": 44.0, "eval_gen_len": 6.1304, "eval_loss": 1.8852447271347046, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 10.5001, "eval_samples_per_second": 10.952, "eval_steps_per_second": 1.905, "step": 847 }, { "epoch": 44.99, "eval_gen_len": 5.9391, "eval_loss": 1.838249921798706, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 10.0487, "eval_samples_per_second": 11.444, "eval_steps_per_second": 1.99, "step": 866 }, { "epoch": 45.97, "eval_gen_len": 6.087, "eval_loss": 1.7976738214492798, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 10.2357, "eval_samples_per_second": 11.235, "eval_steps_per_second": 1.954, "step": 885 }, { "epoch": 46.96, "eval_gen_len": 6.2609, "eval_loss": 1.7594256401062012, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 10.0331, "eval_samples_per_second": 11.462, "eval_steps_per_second": 1.993, "step": 904 }, { "epoch": 48.0, "eval_gen_len": 6.3565, "eval_loss": 1.7259361743927002, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 10.0928, "eval_samples_per_second": 11.394, "eval_steps_per_second": 1.982, "step": 924 }, { "epoch": 48.99, "eval_gen_len": 6.0348, "eval_loss": 1.7035044431686401, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 10.501, "eval_samples_per_second": 10.951, "eval_steps_per_second": 1.905, "step": 943 }, { "epoch": 49.97, "eval_gen_len": 6.113, "eval_loss": 1.681233525276184, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 19.043, "eval_samples_per_second": 6.039, "eval_steps_per_second": 1.05, "step": 962 }, { "epoch": 50.96, "eval_gen_len": 5.8696, "eval_loss": 1.6589038372039795, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 22.3499, "eval_samples_per_second": 5.145, "eval_steps_per_second": 0.895, "step": 981 }, { "epoch": 51.95, "grad_norm": 2.3630588054656982, "learning_rate": 1.650526315789474e-05, "loss": 4.012, "step": 1000 }, { "epoch": 52.0, "eval_gen_len": 5.713, "eval_loss": 1.639954924583435, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 22.1264, "eval_samples_per_second": 5.197, "eval_steps_per_second": 0.904, "step": 1001 }, { "epoch": 52.99, "eval_gen_len": 5.6957, "eval_loss": 1.6223595142364502, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 19.2896, "eval_samples_per_second": 5.962, "eval_steps_per_second": 1.037, "step": 1020 }, { "epoch": 53.97, "eval_gen_len": 5.887, "eval_loss": 1.6063199043273926, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 17.9683, "eval_samples_per_second": 6.4, "eval_steps_per_second": 1.113, "step": 1039 }, { "epoch": 54.96, "eval_gen_len": 5.9826, "eval_loss": 1.5919499397277832, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 20.6085, "eval_samples_per_second": 5.58, "eval_steps_per_second": 0.97, "step": 1058 }, { "epoch": 56.0, "eval_gen_len": 6.0087, "eval_loss": 1.5780121088027954, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 22.8466, "eval_samples_per_second": 5.034, "eval_steps_per_second": 0.875, "step": 1078 }, { "epoch": 56.99, "eval_gen_len": 5.9652, "eval_loss": 1.5654348134994507, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 16.8326, "eval_samples_per_second": 6.832, "eval_steps_per_second": 1.188, "step": 1097 }, { "epoch": 57.97, "eval_gen_len": 6.3304, "eval_loss": 1.5537272691726685, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 14.7124, "eval_samples_per_second": 7.817, "eval_steps_per_second": 1.359, "step": 1116 }, { "epoch": 58.96, "eval_gen_len": 6.8609, "eval_loss": 1.5426743030548096, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 14.3263, "eval_samples_per_second": 8.027, "eval_steps_per_second": 1.396, "step": 1135 }, { "epoch": 60.0, "eval_gen_len": 7.2, "eval_loss": 1.5310094356536865, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 17.746, "eval_samples_per_second": 6.48, "eval_steps_per_second": 1.127, "step": 1155 }, { "epoch": 60.99, "eval_gen_len": 7.4261, "eval_loss": 1.519776701927185, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 16.3177, "eval_samples_per_second": 7.048, "eval_steps_per_second": 1.226, "step": 1174 }, { "epoch": 61.97, "eval_gen_len": 6.9826, "eval_loss": 1.5120151042938232, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 15.0793, "eval_samples_per_second": 7.626, "eval_steps_per_second": 1.326, "step": 1193 }, { "epoch": 62.96, "eval_gen_len": 6.6957, "eval_loss": 1.500430941581726, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 16.015, "eval_samples_per_second": 7.181, "eval_steps_per_second": 1.249, "step": 1212 }, { "epoch": 64.0, "eval_gen_len": 6.9565, "eval_loss": 1.489511489868164, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 10.2674, "eval_samples_per_second": 11.2, "eval_steps_per_second": 1.948, "step": 1232 }, { "epoch": 64.99, "eval_gen_len": 7.2348, "eval_loss": 1.4760735034942627, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 14.5412, "eval_samples_per_second": 7.909, "eval_steps_per_second": 1.375, "step": 1251 }, { "epoch": 65.97, "eval_gen_len": 7.5043, "eval_loss": 1.4650626182556152, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 19.5952, "eval_samples_per_second": 5.869, "eval_steps_per_second": 1.021, "step": 1270 }, { "epoch": 66.96, "eval_gen_len": 7.4174, "eval_loss": 1.4578195810317993, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 15.4051, "eval_samples_per_second": 7.465, "eval_steps_per_second": 1.298, "step": 1289 }, { "epoch": 68.0, "eval_gen_len": 7.5304, "eval_loss": 1.449414610862732, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 21.6421, "eval_samples_per_second": 5.314, "eval_steps_per_second": 0.924, "step": 1309 }, { "epoch": 68.99, "eval_gen_len": 7.4261, "eval_loss": 1.4453145265579224, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 15.2733, "eval_samples_per_second": 7.529, "eval_steps_per_second": 1.309, "step": 1328 }, { "epoch": 69.97, "eval_gen_len": 7.5217, "eval_loss": 1.4360324144363403, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 17.557, "eval_samples_per_second": 6.55, "eval_steps_per_second": 1.139, "step": 1347 }, { "epoch": 70.96, "eval_gen_len": 7.513, "eval_loss": 1.4272183179855347, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 15.8995, "eval_samples_per_second": 7.233, "eval_steps_per_second": 1.258, "step": 1366 }, { "epoch": 72.0, "eval_gen_len": 7.5391, "eval_loss": 1.420629620552063, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 18.7912, "eval_samples_per_second": 6.12, "eval_steps_per_second": 1.064, "step": 1386 }, { "epoch": 72.99, "eval_gen_len": 7.6261, "eval_loss": 1.4113017320632935, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 17.3516, "eval_samples_per_second": 6.628, "eval_steps_per_second": 1.153, "step": 1405 }, { "epoch": 73.97, "eval_gen_len": 7.9478, "eval_loss": 1.4024852514266968, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 15.9512, "eval_samples_per_second": 7.21, "eval_steps_per_second": 1.254, "step": 1424 }, { "epoch": 74.96, "eval_gen_len": 7.687, "eval_loss": 1.3967483043670654, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 13.9958, "eval_samples_per_second": 8.217, "eval_steps_per_second": 1.429, "step": 1443 }, { "epoch": 76.0, "eval_gen_len": 7.5391, "eval_loss": 1.390748143196106, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 21.6525, "eval_samples_per_second": 5.311, "eval_steps_per_second": 0.924, "step": 1463 }, { "epoch": 76.99, "eval_gen_len": 7.687, "eval_loss": 1.3812955617904663, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 14.5594, "eval_samples_per_second": 7.899, "eval_steps_per_second": 1.374, "step": 1482 }, { "epoch": 77.92, "grad_norm": 4.4105072021484375, "learning_rate": 1.475438596491228e-05, "loss": 1.7845, "step": 1500 }, { "epoch": 77.97, "eval_gen_len": 7.8174, "eval_loss": 1.373058557510376, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 13.5779, "eval_samples_per_second": 8.47, "eval_steps_per_second": 1.473, "step": 1501 }, { "epoch": 78.96, "eval_gen_len": 8.0435, "eval_loss": 1.364722728729248, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 13.5052, "eval_samples_per_second": 8.515, "eval_steps_per_second": 1.481, "step": 1520 }, { "epoch": 80.0, "eval_gen_len": 8.4087, "eval_loss": 1.3542518615722656, "eval_rouge1": 0.001, "eval_rouge2": 0.0, "eval_rougeL": 0.0007, "eval_rougeLsum": 0.0007, "eval_runtime": 16.4169, "eval_samples_per_second": 7.005, "eval_steps_per_second": 1.218, "step": 1540 }, { "epoch": 80.99, "eval_gen_len": 8.2, "eval_loss": 1.3473597764968872, "eval_rouge1": 0.0006, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 16.0157, "eval_samples_per_second": 7.18, "eval_steps_per_second": 1.249, "step": 1559 }, { "epoch": 81.97, "eval_gen_len": 7.7739, "eval_loss": 1.3397005796432495, "eval_rouge1": 0.0006, "eval_rouge2": 0.0, "eval_rougeL": 0.0006, "eval_rougeLsum": 0.0006, "eval_runtime": 15.1007, "eval_samples_per_second": 7.616, "eval_steps_per_second": 1.324, "step": 1578 }, { "epoch": 82.96, "eval_gen_len": 7.4783, "eval_loss": 1.3318209648132324, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 15.6414, "eval_samples_per_second": 7.352, "eval_steps_per_second": 1.279, "step": 1597 }, { "epoch": 84.0, "eval_gen_len": 7.7478, "eval_loss": 1.3251750469207764, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 15.8191, "eval_samples_per_second": 7.27, "eval_steps_per_second": 1.264, "step": 1617 }, { "epoch": 84.99, "eval_gen_len": 7.8609, "eval_loss": 1.3169076442718506, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 14.9535, "eval_samples_per_second": 7.691, "eval_steps_per_second": 1.337, "step": 1636 }, { "epoch": 85.97, "eval_gen_len": 8.0609, "eval_loss": 1.308994174003601, "eval_rouge1": 0.0011, "eval_rouge2": 0.0004, "eval_rougeL": 0.0009, "eval_rougeLsum": 0.0009, "eval_runtime": 17.1311, "eval_samples_per_second": 6.713, "eval_steps_per_second": 1.167, "step": 1655 }, { "epoch": 86.96, "eval_gen_len": 8.4174, "eval_loss": 1.3022288084030151, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 18.0371, "eval_samples_per_second": 6.376, "eval_steps_per_second": 1.109, "step": 1674 }, { "epoch": 88.0, "eval_gen_len": 8.6696, "eval_loss": 1.2966970205307007, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 17.2294, "eval_samples_per_second": 6.675, "eval_steps_per_second": 1.161, "step": 1694 }, { "epoch": 88.99, "eval_gen_len": 8.5913, "eval_loss": 1.2914807796478271, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 16.2714, "eval_samples_per_second": 7.068, "eval_steps_per_second": 1.229, "step": 1713 }, { "epoch": 89.97, "eval_gen_len": 8.4609, "eval_loss": 1.285845398902893, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 16.2006, "eval_samples_per_second": 7.099, "eval_steps_per_second": 1.235, "step": 1732 }, { "epoch": 90.96, "eval_gen_len": 8.3304, "eval_loss": 1.2773631811141968, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 14.3032, "eval_samples_per_second": 8.04, "eval_steps_per_second": 1.398, "step": 1751 }, { "epoch": 92.0, "eval_gen_len": 8.4087, "eval_loss": 1.2694664001464844, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 14.8525, "eval_samples_per_second": 7.743, "eval_steps_per_second": 1.347, "step": 1771 }, { "epoch": 92.99, "eval_gen_len": 8.5217, "eval_loss": 1.2651293277740479, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 14.3351, "eval_samples_per_second": 8.022, "eval_steps_per_second": 1.395, "step": 1790 }, { "epoch": 93.97, "eval_gen_len": 8.5217, "eval_loss": 1.2624008655548096, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 14.2422, "eval_samples_per_second": 8.075, "eval_steps_per_second": 1.404, "step": 1809 }, { "epoch": 94.96, "eval_gen_len": 8.4783, "eval_loss": 1.2562423944473267, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 15.5045, "eval_samples_per_second": 7.417, "eval_steps_per_second": 1.29, "step": 1828 }, { "epoch": 96.0, "eval_gen_len": 8.287, "eval_loss": 1.2521991729736328, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 18.0292, "eval_samples_per_second": 6.379, "eval_steps_per_second": 1.109, "step": 1848 }, { "epoch": 96.99, "eval_gen_len": 8.2522, "eval_loss": 1.2463409900665283, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 16.1387, "eval_samples_per_second": 7.126, "eval_steps_per_second": 1.239, "step": 1867 }, { "epoch": 97.97, "eval_gen_len": 8.5217, "eval_loss": 1.2417724132537842, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 24.8398, "eval_samples_per_second": 4.63, "eval_steps_per_second": 0.805, "step": 1886 }, { "epoch": 98.96, "eval_gen_len": 8.6609, "eval_loss": 1.2342702150344849, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 13.8803, "eval_samples_per_second": 8.285, "eval_steps_per_second": 1.441, "step": 1905 }, { "epoch": 100.0, "eval_gen_len": 8.687, "eval_loss": 1.2301725149154663, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 15.943, "eval_samples_per_second": 7.213, "eval_steps_per_second": 1.254, "step": 1925 }, { "epoch": 100.99, "eval_gen_len": 8.4609, "eval_loss": 1.226989507675171, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 16.4066, "eval_samples_per_second": 7.009, "eval_steps_per_second": 1.219, "step": 1944 }, { "epoch": 101.97, "eval_gen_len": 8.2957, "eval_loss": 1.220055103302002, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 17.02, "eval_samples_per_second": 6.757, "eval_steps_per_second": 1.175, "step": 1963 }, { "epoch": 102.96, "eval_gen_len": 8.1826, "eval_loss": 1.215019702911377, "eval_rouge1": 0.0003, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 17.3994, "eval_samples_per_second": 6.609, "eval_steps_per_second": 1.149, "step": 1982 }, { "epoch": 103.9, "grad_norm": 4.967583656311035, "learning_rate": 1.3000000000000001e-05, "loss": 1.5128, "step": 2000 }, { "epoch": 104.0, "eval_gen_len": 8.2087, "eval_loss": 1.2050235271453857, "eval_rouge1": 0.0003, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 14.9921, "eval_samples_per_second": 7.671, "eval_steps_per_second": 1.334, "step": 2002 }, { "epoch": 104.99, "eval_gen_len": 8.4696, "eval_loss": 1.1983749866485596, "eval_rouge1": 0.0006, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 16.8064, "eval_samples_per_second": 6.843, "eval_steps_per_second": 1.19, "step": 2021 }, { "epoch": 105.97, "eval_gen_len": 8.8435, "eval_loss": 1.1935399770736694, "eval_rouge1": 0.0003, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 15.6582, "eval_samples_per_second": 7.344, "eval_steps_per_second": 1.277, "step": 2040 }, { "epoch": 106.96, "eval_gen_len": 8.7739, "eval_loss": 1.1894173622131348, "eval_rouge1": 0.0003, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 16.7955, "eval_samples_per_second": 6.847, "eval_steps_per_second": 1.191, "step": 2059 }, { "epoch": 108.0, "eval_gen_len": 8.5565, "eval_loss": 1.1841349601745605, "eval_rouge1": 0.0003, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 15.4737, "eval_samples_per_second": 7.432, "eval_steps_per_second": 1.293, "step": 2079 }, { "epoch": 108.99, "eval_gen_len": 8.6435, "eval_loss": 1.1762468814849854, "eval_rouge1": 0.0003, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 15.4231, "eval_samples_per_second": 7.456, "eval_steps_per_second": 1.297, "step": 2098 }, { "epoch": 109.97, "eval_gen_len": 8.513, "eval_loss": 1.1688281297683716, "eval_rouge1": 0.0003, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 23.2171, "eval_samples_per_second": 4.953, "eval_steps_per_second": 0.861, "step": 2117 }, { "epoch": 110.96, "eval_gen_len": 8.4522, "eval_loss": 1.163394570350647, "eval_rouge1": 0.0003, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 14.9862, "eval_samples_per_second": 7.674, "eval_steps_per_second": 1.335, "step": 2136 }, { "epoch": 112.0, "eval_gen_len": 8.4261, "eval_loss": 1.1577537059783936, "eval_rouge1": 0.001, "eval_rouge2": 0.0, "eval_rougeL": 0.001, "eval_rougeLsum": 0.0009, "eval_runtime": 18.0754, "eval_samples_per_second": 6.362, "eval_steps_per_second": 1.106, "step": 2156 }, { "epoch": 112.99, "eval_gen_len": 8.4087, "eval_loss": 1.1507985591888428, "eval_rouge1": 0.0003, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 17.1754, "eval_samples_per_second": 6.696, "eval_steps_per_second": 1.164, "step": 2175 }, { "epoch": 113.97, "eval_gen_len": 8.6696, "eval_loss": 1.1435272693634033, "eval_rouge1": 0.0003, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 15.4446, "eval_samples_per_second": 7.446, "eval_steps_per_second": 1.295, "step": 2194 }, { "epoch": 114.96, "eval_gen_len": 8.8087, "eval_loss": 1.1399484872817993, "eval_rouge1": 0.0006, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 17.4221, "eval_samples_per_second": 6.601, "eval_steps_per_second": 1.148, "step": 2213 }, { "epoch": 116.0, "eval_gen_len": 8.7565, "eval_loss": 1.1332604885101318, "eval_rouge1": 0.0006, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 16.1131, "eval_samples_per_second": 7.137, "eval_steps_per_second": 1.241, "step": 2233 }, { "epoch": 116.99, "eval_gen_len": 8.7478, "eval_loss": 1.1271406412124634, "eval_rouge1": 0.0003, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 16.9778, "eval_samples_per_second": 6.774, "eval_steps_per_second": 1.178, "step": 2252 }, { "epoch": 117.97, "eval_gen_len": 8.8609, "eval_loss": 1.1240047216415405, "eval_rouge1": 0.0003, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 14.2803, "eval_samples_per_second": 8.053, "eval_steps_per_second": 1.401, "step": 2271 }, { "epoch": 118.96, "eval_gen_len": 8.7391, "eval_loss": 1.1195180416107178, "eval_rouge1": 0.0005, "eval_rouge2": 0.0, "eval_rougeL": 0.0005, "eval_rougeLsum": 0.0005, "eval_runtime": 14.6134, "eval_samples_per_second": 7.869, "eval_steps_per_second": 1.369, "step": 2290 }, { "epoch": 120.0, "eval_gen_len": 8.7043, "eval_loss": 1.113542079925537, "eval_rouge1": 0.0005, "eval_rouge2": 0.0, "eval_rougeL": 0.0005, "eval_rougeLsum": 0.0005, "eval_runtime": 21.0862, "eval_samples_per_second": 5.454, "eval_steps_per_second": 0.948, "step": 2310 }, { "epoch": 120.99, "eval_gen_len": 8.5043, "eval_loss": 1.1078674793243408, "eval_rouge1": 0.0003, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 18.0247, "eval_samples_per_second": 6.38, "eval_steps_per_second": 1.11, "step": 2329 }, { "epoch": 121.97, "eval_gen_len": 8.4696, "eval_loss": 1.0989575386047363, "eval_rouge1": 0.0003, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 16.1592, "eval_samples_per_second": 7.117, "eval_steps_per_second": 1.238, "step": 2348 }, { "epoch": 122.96, "eval_gen_len": 8.8261, "eval_loss": 1.0940810441970825, "eval_rouge1": 0.0003, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 29.3501, "eval_samples_per_second": 3.918, "eval_steps_per_second": 0.681, "step": 2367 }, { "epoch": 124.0, "eval_gen_len": 8.7826, "eval_loss": 1.0875351428985596, "eval_rouge1": 0.0003, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 17.9868, "eval_samples_per_second": 6.394, "eval_steps_per_second": 1.112, "step": 2387 }, { "epoch": 124.99, "eval_gen_len": 8.5913, "eval_loss": 1.083350658416748, "eval_rouge1": 0.0003, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 17.325, "eval_samples_per_second": 6.638, "eval_steps_per_second": 1.154, "step": 2406 }, { "epoch": 125.97, "eval_gen_len": 8.9652, "eval_loss": 1.0746002197265625, "eval_rouge1": 0.0003, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 15.8215, "eval_samples_per_second": 7.269, "eval_steps_per_second": 1.264, "step": 2425 }, { "epoch": 126.96, "eval_gen_len": 9.0696, "eval_loss": 1.0692858695983887, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 15.6722, "eval_samples_per_second": 7.338, "eval_steps_per_second": 1.276, "step": 2444 }, { "epoch": 128.0, "eval_gen_len": 9.0261, "eval_loss": 1.0652384757995605, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 24.7207, "eval_samples_per_second": 4.652, "eval_steps_per_second": 0.809, "step": 2464 }, { "epoch": 128.99, "eval_gen_len": 9.0348, "eval_loss": 1.0582802295684814, "eval_rouge1": 0.0003, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 17.9675, "eval_samples_per_second": 6.4, "eval_steps_per_second": 1.113, "step": 2483 }, { "epoch": 129.87, "grad_norm": 1.7797880172729492, "learning_rate": 1.124561403508772e-05, "loss": 1.3193, "step": 2500 }, { "epoch": 129.97, "eval_gen_len": 9.1217, "eval_loss": 1.0517534017562866, "eval_rouge1": 0.0003, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 15.6173, "eval_samples_per_second": 7.364, "eval_steps_per_second": 1.281, "step": 2502 }, { "epoch": 130.96, "eval_gen_len": 8.887, "eval_loss": 1.0467168092727661, "eval_rouge1": 0.0003, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 15.1436, "eval_samples_per_second": 7.594, "eval_steps_per_second": 1.321, "step": 2521 }, { "epoch": 132.0, "eval_gen_len": 8.8348, "eval_loss": 1.0417622327804565, "eval_rouge1": 0.0003, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 14.5373, "eval_samples_per_second": 7.911, "eval_steps_per_second": 1.376, "step": 2541 }, { "epoch": 132.99, "eval_gen_len": 8.7826, "eval_loss": 1.0359249114990234, "eval_rouge1": 0.0003, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 15.5453, "eval_samples_per_second": 7.398, "eval_steps_per_second": 1.287, "step": 2560 }, { "epoch": 133.97, "eval_gen_len": 8.7217, "eval_loss": 1.0301254987716675, "eval_rouge1": 0.0003, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 17.6306, "eval_samples_per_second": 6.523, "eval_steps_per_second": 1.134, "step": 2579 }, { "epoch": 134.96, "eval_gen_len": 8.7739, "eval_loss": 1.0256870985031128, "eval_rouge1": 0.0003, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 15.6433, "eval_samples_per_second": 7.351, "eval_steps_per_second": 1.279, "step": 2598 }, { "epoch": 136.0, "eval_gen_len": 9.2348, "eval_loss": 1.0207563638687134, "eval_rouge1": 0.0018, "eval_rouge2": 0.0009, "eval_rougeL": 0.0021, "eval_rougeLsum": 0.0018, "eval_runtime": 16.9915, "eval_samples_per_second": 6.768, "eval_steps_per_second": 1.177, "step": 2618 }, { "epoch": 136.99, "eval_gen_len": 9.4783, "eval_loss": 1.0155842304229736, "eval_rouge1": 0.0028, "eval_rouge2": 0.0011, "eval_rougeL": 0.0028, "eval_rougeLsum": 0.0028, "eval_runtime": 15.1617, "eval_samples_per_second": 7.585, "eval_steps_per_second": 1.319, "step": 2637 }, { "epoch": 137.97, "eval_gen_len": 9.4609, "eval_loss": 1.010608434677124, "eval_rouge1": 0.0018, "eval_rouge2": 0.0009, "eval_rougeL": 0.0018, "eval_rougeLsum": 0.0018, "eval_runtime": 17.3586, "eval_samples_per_second": 6.625, "eval_steps_per_second": 1.152, "step": 2656 }, { "epoch": 138.96, "eval_gen_len": 9.4522, "eval_loss": 1.006165862083435, "eval_rouge1": 0.0018, "eval_rouge2": 0.0009, "eval_rougeL": 0.0018, "eval_rougeLsum": 0.0018, "eval_runtime": 15.6312, "eval_samples_per_second": 7.357, "eval_steps_per_second": 1.279, "step": 2675 }, { "epoch": 140.0, "eval_gen_len": 9.4435, "eval_loss": 1.0015084743499756, "eval_rouge1": 0.0018, "eval_rouge2": 0.0009, "eval_rougeL": 0.0018, "eval_rougeLsum": 0.0018, "eval_runtime": 15.1443, "eval_samples_per_second": 7.594, "eval_steps_per_second": 1.321, "step": 2695 }, { "epoch": 140.99, "eval_gen_len": 9.5913, "eval_loss": 0.9966647028923035, "eval_rouge1": 0.0015, "eval_rouge2": 0.0003, "eval_rougeL": 0.0018, "eval_rougeLsum": 0.0015, "eval_runtime": 16.3381, "eval_samples_per_second": 7.039, "eval_steps_per_second": 1.224, "step": 2714 }, { "epoch": 141.97, "eval_gen_len": 9.6783, "eval_loss": 0.9923425912857056, "eval_rouge1": 0.0015, "eval_rouge2": 0.0003, "eval_rougeL": 0.0018, "eval_rougeLsum": 0.0015, "eval_runtime": 14.5033, "eval_samples_per_second": 7.929, "eval_steps_per_second": 1.379, "step": 2733 }, { "epoch": 142.96, "eval_gen_len": 9.8, "eval_loss": 0.9881101250648499, "eval_rouge1": 0.0015, "eval_rouge2": 0.0003, "eval_rougeL": 0.0015, "eval_rougeLsum": 0.0015, "eval_runtime": 17.6181, "eval_samples_per_second": 6.527, "eval_steps_per_second": 1.135, "step": 2752 }, { "epoch": 144.0, "eval_gen_len": 9.8435, "eval_loss": 0.9837466478347778, "eval_rouge1": 0.0015, "eval_rouge2": 0.0003, "eval_rougeL": 0.0015, "eval_rougeLsum": 0.0015, "eval_runtime": 15.937, "eval_samples_per_second": 7.216, "eval_steps_per_second": 1.255, "step": 2772 }, { "epoch": 144.99, "eval_gen_len": 9.9304, "eval_loss": 0.9798020720481873, "eval_rouge1": 0.0015, "eval_rouge2": 0.0003, "eval_rougeL": 0.0015, "eval_rougeLsum": 0.0015, "eval_runtime": 16.407, "eval_samples_per_second": 7.009, "eval_steps_per_second": 1.219, "step": 2791 }, { "epoch": 145.97, "eval_gen_len": 9.9826, "eval_loss": 0.975723922252655, "eval_rouge1": 0.0015, "eval_rouge2": 0.0003, "eval_rougeL": 0.0015, "eval_rougeLsum": 0.0015, "eval_runtime": 15.1268, "eval_samples_per_second": 7.602, "eval_steps_per_second": 1.322, "step": 2810 }, { "epoch": 146.96, "eval_gen_len": 10.0261, "eval_loss": 0.9714429378509521, "eval_rouge1": 0.0015, "eval_rouge2": 0.0003, "eval_rougeL": 0.0015, "eval_rougeLsum": 0.0015, "eval_runtime": 16.0606, "eval_samples_per_second": 7.16, "eval_steps_per_second": 1.245, "step": 2829 }, { "epoch": 148.0, "eval_gen_len": 9.9739, "eval_loss": 0.9681385159492493, "eval_rouge1": 0.0015, "eval_rouge2": 0.0003, "eval_rougeL": 0.0015, "eval_rougeLsum": 0.0015, "eval_runtime": 15.6543, "eval_samples_per_second": 7.346, "eval_steps_per_second": 1.278, "step": 2849 }, { "epoch": 148.99, "eval_gen_len": 9.9739, "eval_loss": 0.9637375473976135, "eval_rouge1": 0.0015, "eval_rouge2": 0.0003, "eval_rougeL": 0.0015, "eval_rougeLsum": 0.0015, "eval_runtime": 15.1696, "eval_samples_per_second": 7.581, "eval_steps_per_second": 1.318, "step": 2868 }, { "epoch": 149.97, "eval_gen_len": 10.0348, "eval_loss": 0.9596477746963501, "eval_rouge1": 0.0015, "eval_rouge2": 0.0009, "eval_rougeL": 0.0015, "eval_rougeLsum": 0.0015, "eval_runtime": 15.2187, "eval_samples_per_second": 7.557, "eval_steps_per_second": 1.314, "step": 2887 }, { "epoch": 150.96, "eval_gen_len": 10.0174, "eval_loss": 0.9558045268058777, "eval_rouge1": 0.0017, "eval_rouge2": 0.0009, "eval_rougeL": 0.0017, "eval_rougeLsum": 0.0017, "eval_runtime": 16.4212, "eval_samples_per_second": 7.003, "eval_steps_per_second": 1.218, "step": 2906 }, { "epoch": 152.0, "eval_gen_len": 10.1304, "eval_loss": 0.9513251185417175, "eval_rouge1": 0.0021, "eval_rouge2": 0.0005, "eval_rougeL": 0.0021, "eval_rougeLsum": 0.0021, "eval_runtime": 17.1931, "eval_samples_per_second": 6.689, "eval_steps_per_second": 1.163, "step": 2926 }, { "epoch": 152.99, "eval_gen_len": 10.1217, "eval_loss": 0.947124719619751, "eval_rouge1": 0.0021, "eval_rouge2": 0.0014, "eval_rougeL": 0.0021, "eval_rougeLsum": 0.0021, "eval_runtime": 16.7224, "eval_samples_per_second": 6.877, "eval_steps_per_second": 1.196, "step": 2945 }, { "epoch": 153.97, "eval_gen_len": 10.2696, "eval_loss": 0.9428749084472656, "eval_rouge1": 0.0043, "eval_rouge2": 0.0014, "eval_rougeL": 0.0037, "eval_rougeLsum": 0.0036, "eval_runtime": 15.3406, "eval_samples_per_second": 7.496, "eval_steps_per_second": 1.304, "step": 2964 }, { "epoch": 154.96, "eval_gen_len": 10.1217, "eval_loss": 0.939849853515625, "eval_rouge1": 0.0021, "eval_rouge2": 0.0014, "eval_rougeL": 0.0021, "eval_rougeLsum": 0.0021, "eval_runtime": 15.8431, "eval_samples_per_second": 7.259, "eval_steps_per_second": 1.262, "step": 2983 }, { "epoch": 155.84, "grad_norm": 0.8866944313049316, "learning_rate": 9.49122807017544e-06, "loss": 1.1379, "step": 3000 }, { "epoch": 156.0, "eval_gen_len": 10.0522, "eval_loss": 0.9357353448867798, "eval_rouge1": 0.0017, "eval_rouge2": 0.0011, "eval_rougeL": 0.0017, "eval_rougeLsum": 0.0017, "eval_runtime": 16.7405, "eval_samples_per_second": 6.87, "eval_steps_per_second": 1.195, "step": 3003 }, { "epoch": 156.99, "eval_gen_len": 10.1217, "eval_loss": 0.9312177300453186, "eval_rouge1": 0.0017, "eval_rouge2": 0.0011, "eval_rougeL": 0.0017, "eval_rougeLsum": 0.0017, "eval_runtime": 12.164, "eval_samples_per_second": 9.454, "eval_steps_per_second": 1.644, "step": 3022 }, { "epoch": 157.97, "eval_gen_len": 10.2609, "eval_loss": 0.9275165796279907, "eval_rouge1": 0.0027, "eval_rouge2": 0.0006, "eval_rougeL": 0.0022, "eval_rougeLsum": 0.0022, "eval_runtime": 15.2713, "eval_samples_per_second": 7.53, "eval_steps_per_second": 1.31, "step": 3041 }, { "epoch": 158.96, "eval_gen_len": 10.4435, "eval_loss": 0.9236345887184143, "eval_rouge1": 0.0036, "eval_rouge2": 0.0006, "eval_rougeL": 0.0028, "eval_rougeLsum": 0.0029, "eval_runtime": 17.5627, "eval_samples_per_second": 6.548, "eval_steps_per_second": 1.139, "step": 3060 }, { "epoch": 160.0, "eval_gen_len": 10.513, "eval_loss": 0.9195658564567566, "eval_rouge1": 0.0049, "eval_rouge2": 0.0012, "eval_rougeL": 0.0044, "eval_rougeLsum": 0.0044, "eval_runtime": 16.5853, "eval_samples_per_second": 6.934, "eval_steps_per_second": 1.206, "step": 3080 }, { "epoch": 160.99, "eval_gen_len": 10.487, "eval_loss": 0.9164186120033264, "eval_rouge1": 0.0046, "eval_rouge2": 0.0007, "eval_rougeL": 0.0038, "eval_rougeLsum": 0.0038, "eval_runtime": 15.397, "eval_samples_per_second": 7.469, "eval_steps_per_second": 1.299, "step": 3099 }, { "epoch": 161.97, "eval_gen_len": 10.4783, "eval_loss": 0.9130675196647644, "eval_rouge1": 0.0039, "eval_rouge2": 0.0007, "eval_rougeL": 0.0034, "eval_rougeLsum": 0.0034, "eval_runtime": 16.4247, "eval_samples_per_second": 7.002, "eval_steps_per_second": 1.218, "step": 3118 }, { "epoch": 162.96, "eval_gen_len": 10.6522, "eval_loss": 0.9092690944671631, "eval_rouge1": 0.007, "eval_rouge2": 0.0023, "eval_rougeL": 0.0066, "eval_rougeLsum": 0.0065, "eval_runtime": 14.8696, "eval_samples_per_second": 7.734, "eval_steps_per_second": 1.345, "step": 3137 }, { "epoch": 164.0, "eval_gen_len": 10.5739, "eval_loss": 0.9059688448905945, "eval_rouge1": 0.005, "eval_rouge2": 0.001, "eval_rougeL": 0.0044, "eval_rougeLsum": 0.0043, "eval_runtime": 15.7264, "eval_samples_per_second": 7.313, "eval_steps_per_second": 1.272, "step": 3157 }, { "epoch": 164.99, "eval_gen_len": 10.7391, "eval_loss": 0.9024509191513062, "eval_rouge1": 0.0074, "eval_rouge2": 0.0023, "eval_rougeL": 0.0068, "eval_rougeLsum": 0.0066, "eval_runtime": 15.813, "eval_samples_per_second": 7.272, "eval_steps_per_second": 1.265, "step": 3176 }, { "epoch": 165.97, "eval_gen_len": 10.5652, "eval_loss": 0.8994614481925964, "eval_rouge1": 0.0054, "eval_rouge2": 0.001, "eval_rougeL": 0.0048, "eval_rougeLsum": 0.0048, "eval_runtime": 15.1711, "eval_samples_per_second": 7.58, "eval_steps_per_second": 1.318, "step": 3195 }, { "epoch": 166.96, "eval_gen_len": 10.5913, "eval_loss": 0.8970102667808533, "eval_rouge1": 0.0061, "eval_rouge2": 0.0014, "eval_rougeL": 0.0053, "eval_rougeLsum": 0.0053, "eval_runtime": 14.0622, "eval_samples_per_second": 8.178, "eval_steps_per_second": 1.422, "step": 3214 }, { "epoch": 168.0, "eval_gen_len": 10.6174, "eval_loss": 0.894256055355072, "eval_rouge1": 0.0082, "eval_rouge2": 0.003, "eval_rougeL": 0.0077, "eval_rougeLsum": 0.0075, "eval_runtime": 16.7533, "eval_samples_per_second": 6.864, "eval_steps_per_second": 1.194, "step": 3234 }, { "epoch": 168.99, "eval_gen_len": 10.6348, "eval_loss": 0.891488790512085, "eval_rouge1": 0.0092, "eval_rouge2": 0.0029, "eval_rougeL": 0.0083, "eval_rougeLsum": 0.0081, "eval_runtime": 13.6019, "eval_samples_per_second": 8.455, "eval_steps_per_second": 1.47, "step": 3253 }, { "epoch": 169.97, "eval_gen_len": 10.5913, "eval_loss": 0.8882649540901184, "eval_rouge1": 0.0073, "eval_rouge2": 0.0022, "eval_rougeL": 0.0068, "eval_rougeLsum": 0.0067, "eval_runtime": 16.0681, "eval_samples_per_second": 7.157, "eval_steps_per_second": 1.245, "step": 3272 }, { "epoch": 170.96, "eval_gen_len": 10.6522, "eval_loss": 0.8857714533805847, "eval_rouge1": 0.009, "eval_rouge2": 0.0025, "eval_rougeL": 0.0081, "eval_rougeLsum": 0.008, "eval_runtime": 19.0222, "eval_samples_per_second": 6.046, "eval_steps_per_second": 1.051, "step": 3291 }, { "epoch": 172.0, "eval_gen_len": 10.5826, "eval_loss": 0.8824735283851624, "eval_rouge1": 0.0073, "eval_rouge2": 0.0018, "eval_rougeL": 0.0068, "eval_rougeLsum": 0.0068, "eval_runtime": 17.2556, "eval_samples_per_second": 6.665, "eval_steps_per_second": 1.159, "step": 3311 }, { "epoch": 172.99, "eval_gen_len": 10.5913, "eval_loss": 0.8791074156761169, "eval_rouge1": 0.0077, "eval_rouge2": 0.0016, "eval_rougeL": 0.0066, "eval_rougeLsum": 0.0066, "eval_runtime": 15.3622, "eval_samples_per_second": 7.486, "eval_steps_per_second": 1.302, "step": 3330 }, { "epoch": 173.97, "eval_gen_len": 10.6174, "eval_loss": 0.8760549426078796, "eval_rouge1": 0.0078, "eval_rouge2": 0.0017, "eval_rougeL": 0.0069, "eval_rougeLsum": 0.007, "eval_runtime": 13.7617, "eval_samples_per_second": 8.357, "eval_steps_per_second": 1.453, "step": 3349 }, { "epoch": 174.96, "eval_gen_len": 10.8348, "eval_loss": 0.8735494017601013, "eval_rouge1": 0.0099, "eval_rouge2": 0.0031, "eval_rougeL": 0.0093, "eval_rougeLsum": 0.0093, "eval_runtime": 16.3628, "eval_samples_per_second": 7.028, "eval_steps_per_second": 1.222, "step": 3368 }, { "epoch": 176.0, "eval_gen_len": 10.8174, "eval_loss": 0.8713410496711731, "eval_rouge1": 0.0103, "eval_rouge2": 0.0031, "eval_rougeL": 0.0097, "eval_rougeLsum": 0.0098, "eval_runtime": 15.0408, "eval_samples_per_second": 7.646, "eval_steps_per_second": 1.33, "step": 3388 }, { "epoch": 176.99, "eval_gen_len": 10.687, "eval_loss": 0.8688496947288513, "eval_rouge1": 0.0104, "eval_rouge2": 0.0027, "eval_rougeL": 0.0087, "eval_rougeLsum": 0.0087, "eval_runtime": 13.3269, "eval_samples_per_second": 8.629, "eval_steps_per_second": 1.501, "step": 3407 }, { "epoch": 177.97, "eval_gen_len": 10.7304, "eval_loss": 0.8659321069717407, "eval_rouge1": 0.0102, "eval_rouge2": 0.0022, "eval_rougeL": 0.0085, "eval_rougeLsum": 0.0083, "eval_runtime": 15.8407, "eval_samples_per_second": 7.26, "eval_steps_per_second": 1.263, "step": 3426 }, { "epoch": 178.96, "eval_gen_len": 10.9217, "eval_loss": 0.8626890778541565, "eval_rouge1": 0.0109, "eval_rouge2": 0.0025, "eval_rougeL": 0.0086, "eval_rougeLsum": 0.0085, "eval_runtime": 15.1338, "eval_samples_per_second": 7.599, "eval_steps_per_second": 1.322, "step": 3445 }, { "epoch": 180.0, "eval_gen_len": 11.087, "eval_loss": 0.8599569201469421, "eval_rouge1": 0.0124, "eval_rouge2": 0.0025, "eval_rougeL": 0.0101, "eval_rougeLsum": 0.0101, "eval_runtime": 21.7846, "eval_samples_per_second": 5.279, "eval_steps_per_second": 0.918, "step": 3465 }, { "epoch": 180.99, "eval_gen_len": 11.1478, "eval_loss": 0.8579829931259155, "eval_rouge1": 0.0132, "eval_rouge2": 0.0026, "eval_rougeL": 0.0111, "eval_rougeLsum": 0.0109, "eval_runtime": 14.4812, "eval_samples_per_second": 7.941, "eval_steps_per_second": 1.381, "step": 3484 }, { "epoch": 181.82, "grad_norm": 0.5403133034706116, "learning_rate": 7.736842105263158e-06, "loss": 1.0168, "step": 3500 }, { "epoch": 181.97, "eval_gen_len": 10.9739, "eval_loss": 0.8559067845344543, "eval_rouge1": 0.011, "eval_rouge2": 0.0027, "eval_rougeL": 0.0095, "eval_rougeLsum": 0.0093, "eval_runtime": 13.8018, "eval_samples_per_second": 8.332, "eval_steps_per_second": 1.449, "step": 3503 }, { "epoch": 182.96, "eval_gen_len": 10.9652, "eval_loss": 0.8531643152236938, "eval_rouge1": 0.0122, "eval_rouge2": 0.0033, "eval_rougeL": 0.0101, "eval_rougeLsum": 0.01, "eval_runtime": 15.9407, "eval_samples_per_second": 7.214, "eval_steps_per_second": 1.255, "step": 3522 }, { "epoch": 184.0, "eval_gen_len": 11.0609, "eval_loss": 0.8499117493629456, "eval_rouge1": 0.0141, "eval_rouge2": 0.0034, "eval_rougeL": 0.0121, "eval_rougeLsum": 0.012, "eval_runtime": 13.074, "eval_samples_per_second": 8.796, "eval_steps_per_second": 1.53, "step": 3542 }, { "epoch": 184.99, "eval_gen_len": 11.3913, "eval_loss": 0.8471864461898804, "eval_rouge1": 0.0178, "eval_rouge2": 0.0037, "eval_rougeL": 0.0152, "eval_rougeLsum": 0.0149, "eval_runtime": 13.6132, "eval_samples_per_second": 8.448, "eval_steps_per_second": 1.469, "step": 3561 }, { "epoch": 185.97, "eval_gen_len": 11.287, "eval_loss": 0.8454113602638245, "eval_rouge1": 0.0173, "eval_rouge2": 0.0036, "eval_rougeL": 0.0145, "eval_rougeLsum": 0.0141, "eval_runtime": 12.8847, "eval_samples_per_second": 8.925, "eval_steps_per_second": 1.552, "step": 3580 }, { "epoch": 186.96, "eval_gen_len": 11.2261, "eval_loss": 0.8434880375862122, "eval_rouge1": 0.017, "eval_rouge2": 0.0027, "eval_rougeL": 0.0143, "eval_rougeLsum": 0.0141, "eval_runtime": 11.4202, "eval_samples_per_second": 10.07, "eval_steps_per_second": 1.751, "step": 3599 }, { "epoch": 188.0, "eval_gen_len": 11.3913, "eval_loss": 0.840716540813446, "eval_rouge1": 0.0188, "eval_rouge2": 0.0032, "eval_rougeL": 0.0161, "eval_rougeLsum": 0.0159, "eval_runtime": 10.5922, "eval_samples_per_second": 10.857, "eval_steps_per_second": 1.888, "step": 3619 }, { "epoch": 188.99, "eval_gen_len": 11.2087, "eval_loss": 0.8385959267616272, "eval_rouge1": 0.0166, "eval_rouge2": 0.0033, "eval_rougeL": 0.0144, "eval_rougeLsum": 0.0141, "eval_runtime": 11.9064, "eval_samples_per_second": 9.659, "eval_steps_per_second": 1.68, "step": 3638 }, { "epoch": 189.97, "eval_gen_len": 11.2609, "eval_loss": 0.836624026298523, "eval_rouge1": 0.0169, "eval_rouge2": 0.0031, "eval_rougeL": 0.0147, "eval_rougeLsum": 0.0144, "eval_runtime": 10.125, "eval_samples_per_second": 11.358, "eval_steps_per_second": 1.975, "step": 3657 }, { "epoch": 190.96, "eval_gen_len": 11.2522, "eval_loss": 0.834960401058197, "eval_rouge1": 0.0181, "eval_rouge2": 0.0038, "eval_rougeL": 0.0159, "eval_rougeLsum": 0.0158, "eval_runtime": 10.4265, "eval_samples_per_second": 11.03, "eval_steps_per_second": 1.918, "step": 3676 }, { "epoch": 192.0, "eval_gen_len": 11.6174, "eval_loss": 0.832145631313324, "eval_rouge1": 0.0223, "eval_rouge2": 0.0048, "eval_rougeL": 0.0198, "eval_rougeLsum": 0.0197, "eval_runtime": 14.2788, "eval_samples_per_second": 8.054, "eval_steps_per_second": 1.401, "step": 3696 }, { "epoch": 192.99, "eval_gen_len": 11.5913, "eval_loss": 0.8298683166503906, "eval_rouge1": 0.0238, "eval_rouge2": 0.0054, "eval_rougeL": 0.021, "eval_rougeLsum": 0.0208, "eval_runtime": 13.7725, "eval_samples_per_second": 8.35, "eval_steps_per_second": 1.452, "step": 3715 }, { "epoch": 193.97, "eval_gen_len": 11.513, "eval_loss": 0.8281151056289673, "eval_rouge1": 0.0238, "eval_rouge2": 0.0057, "eval_rougeL": 0.0208, "eval_rougeLsum": 0.0206, "eval_runtime": 16.5357, "eval_samples_per_second": 6.955, "eval_steps_per_second": 1.21, "step": 3734 }, { "epoch": 194.96, "eval_gen_len": 11.4696, "eval_loss": 0.8263967633247375, "eval_rouge1": 0.0242, "eval_rouge2": 0.0054, "eval_rougeL": 0.0212, "eval_rougeLsum": 0.0209, "eval_runtime": 14.9165, "eval_samples_per_second": 7.71, "eval_steps_per_second": 1.341, "step": 3753 }, { "epoch": 196.0, "eval_gen_len": 11.513, "eval_loss": 0.8241834044456482, "eval_rouge1": 0.0242, "eval_rouge2": 0.0054, "eval_rougeL": 0.0203, "eval_rougeLsum": 0.0202, "eval_runtime": 13.3344, "eval_samples_per_second": 8.624, "eval_steps_per_second": 1.5, "step": 3773 }, { "epoch": 196.99, "eval_gen_len": 11.8348, "eval_loss": 0.8214186429977417, "eval_rouge1": 0.0277, "eval_rouge2": 0.0058, "eval_rougeL": 0.0246, "eval_rougeLsum": 0.0242, "eval_runtime": 15.0895, "eval_samples_per_second": 7.621, "eval_steps_per_second": 1.325, "step": 3792 }, { "epoch": 197.97, "eval_gen_len": 11.6435, "eval_loss": 0.8196175694465637, "eval_rouge1": 0.0262, "eval_rouge2": 0.0056, "eval_rougeL": 0.0226, "eval_rougeLsum": 0.0227, "eval_runtime": 14.814, "eval_samples_per_second": 7.763, "eval_steps_per_second": 1.35, "step": 3811 }, { "epoch": 198.96, "eval_gen_len": 11.7043, "eval_loss": 0.8178415894508362, "eval_rouge1": 0.0293, "eval_rouge2": 0.006, "eval_rougeL": 0.025, "eval_rougeLsum": 0.0248, "eval_runtime": 15.1097, "eval_samples_per_second": 7.611, "eval_steps_per_second": 1.324, "step": 3830 }, { "epoch": 200.0, "eval_gen_len": 11.8783, "eval_loss": 0.815380334854126, "eval_rouge1": 0.0318, "eval_rouge2": 0.006, "eval_rougeL": 0.0273, "eval_rougeLsum": 0.027, "eval_runtime": 14.6624, "eval_samples_per_second": 7.843, "eval_steps_per_second": 1.364, "step": 3850 }, { "epoch": 200.99, "eval_gen_len": 11.7913, "eval_loss": 0.8136902451515198, "eval_rouge1": 0.0307, "eval_rouge2": 0.0058, "eval_rougeL": 0.0265, "eval_rougeLsum": 0.0262, "eval_runtime": 14.2757, "eval_samples_per_second": 8.056, "eval_steps_per_second": 1.401, "step": 3869 }, { "epoch": 201.97, "eval_gen_len": 11.8, "eval_loss": 0.8119075894355774, "eval_rouge1": 0.032, "eval_rouge2": 0.0061, "eval_rougeL": 0.0279, "eval_rougeLsum": 0.0277, "eval_runtime": 14.1634, "eval_samples_per_second": 8.12, "eval_steps_per_second": 1.412, "step": 3888 }, { "epoch": 202.96, "eval_gen_len": 11.9652, "eval_loss": 0.8098872900009155, "eval_rouge1": 0.0334, "eval_rouge2": 0.0062, "eval_rougeL": 0.0289, "eval_rougeLsum": 0.0285, "eval_runtime": 17.6617, "eval_samples_per_second": 6.511, "eval_steps_per_second": 1.132, "step": 3907 }, { "epoch": 204.0, "eval_gen_len": 12.0522, "eval_loss": 0.8077185750007629, "eval_rouge1": 0.0339, "eval_rouge2": 0.0068, "eval_rougeL": 0.0293, "eval_rougeLsum": 0.0291, "eval_runtime": 18.6428, "eval_samples_per_second": 6.169, "eval_steps_per_second": 1.073, "step": 3927 }, { "epoch": 204.99, "eval_gen_len": 11.9478, "eval_loss": 0.8060031533241272, "eval_rouge1": 0.0331, "eval_rouge2": 0.0065, "eval_rougeL": 0.0286, "eval_rougeLsum": 0.0284, "eval_runtime": 28.209, "eval_samples_per_second": 4.077, "eval_steps_per_second": 0.709, "step": 3946 }, { "epoch": 205.97, "eval_gen_len": 12.2087, "eval_loss": 0.8041767477989197, "eval_rouge1": 0.038, "eval_rouge2": 0.0083, "eval_rougeL": 0.0331, "eval_rougeLsum": 0.0329, "eval_runtime": 13.9196, "eval_samples_per_second": 8.262, "eval_steps_per_second": 1.437, "step": 3965 }, { "epoch": 206.96, "eval_gen_len": 12.2348, "eval_loss": 0.8022732138633728, "eval_rouge1": 0.04, "eval_rouge2": 0.0093, "eval_rougeL": 0.0351, "eval_rougeLsum": 0.0348, "eval_runtime": 18.1652, "eval_samples_per_second": 6.331, "eval_steps_per_second": 1.101, "step": 3984 }, { "epoch": 207.79, "grad_norm": 0.6825528740882874, "learning_rate": 5.982456140350877e-06, "loss": 0.9396, "step": 4000 }, { "epoch": 208.0, "eval_gen_len": 11.9913, "eval_loss": 0.8004079461097717, "eval_rouge1": 0.0377, "eval_rouge2": 0.0083, "eval_rougeL": 0.0326, "eval_rougeLsum": 0.0324, "eval_runtime": 15.4261, "eval_samples_per_second": 7.455, "eval_steps_per_second": 1.297, "step": 4004 }, { "epoch": 208.99, "eval_gen_len": 12.2435, "eval_loss": 0.7987371683120728, "eval_rouge1": 0.0394, "eval_rouge2": 0.0081, "eval_rougeL": 0.0329, "eval_rougeLsum": 0.0326, "eval_runtime": 15.4914, "eval_samples_per_second": 7.423, "eval_steps_per_second": 1.291, "step": 4023 }, { "epoch": 209.97, "eval_gen_len": 12.2174, "eval_loss": 0.7974857687950134, "eval_rouge1": 0.0398, "eval_rouge2": 0.0088, "eval_rougeL": 0.0348, "eval_rougeLsum": 0.0344, "eval_runtime": 15.8068, "eval_samples_per_second": 7.275, "eval_steps_per_second": 1.265, "step": 4042 }, { "epoch": 210.96, "eval_gen_len": 12.2696, "eval_loss": 0.7953728437423706, "eval_rouge1": 0.0415, "eval_rouge2": 0.009, "eval_rougeL": 0.0365, "eval_rougeLsum": 0.036, "eval_runtime": 15.5151, "eval_samples_per_second": 7.412, "eval_steps_per_second": 1.289, "step": 4061 }, { "epoch": 212.0, "eval_gen_len": 12.1304, "eval_loss": 0.7937628626823425, "eval_rouge1": 0.0418, "eval_rouge2": 0.009, "eval_rougeL": 0.037, "eval_rougeLsum": 0.0366, "eval_runtime": 15.6988, "eval_samples_per_second": 7.325, "eval_steps_per_second": 1.274, "step": 4081 }, { "epoch": 212.99, "eval_gen_len": 12.1043, "eval_loss": 0.7920788526535034, "eval_rouge1": 0.0416, "eval_rouge2": 0.009, "eval_rougeL": 0.0369, "eval_rougeLsum": 0.0367, "eval_runtime": 14.6071, "eval_samples_per_second": 7.873, "eval_steps_per_second": 1.369, "step": 4100 }, { "epoch": 213.97, "eval_gen_len": 11.9652, "eval_loss": 0.7905020117759705, "eval_rouge1": 0.041, "eval_rouge2": 0.0078, "eval_rougeL": 0.036, "eval_rougeLsum": 0.0357, "eval_runtime": 16.4932, "eval_samples_per_second": 6.973, "eval_steps_per_second": 1.213, "step": 4119 }, { "epoch": 214.96, "eval_gen_len": 11.9391, "eval_loss": 0.7891045212745667, "eval_rouge1": 0.0411, "eval_rouge2": 0.0078, "eval_rougeL": 0.0361, "eval_rougeLsum": 0.0358, "eval_runtime": 15.0709, "eval_samples_per_second": 7.631, "eval_steps_per_second": 1.327, "step": 4138 }, { "epoch": 216.0, "eval_gen_len": 12.1739, "eval_loss": 0.7874982953071594, "eval_rouge1": 0.0426, "eval_rouge2": 0.0081, "eval_rougeL": 0.0366, "eval_rougeLsum": 0.0363, "eval_runtime": 15.3399, "eval_samples_per_second": 7.497, "eval_steps_per_second": 1.304, "step": 4158 }, { "epoch": 216.99, "eval_gen_len": 12.3043, "eval_loss": 0.7856701016426086, "eval_rouge1": 0.0444, "eval_rouge2": 0.0092, "eval_rougeL": 0.0384, "eval_rougeLsum": 0.0383, "eval_runtime": 16.5308, "eval_samples_per_second": 6.957, "eval_steps_per_second": 1.21, "step": 4177 }, { "epoch": 217.97, "eval_gen_len": 12.2957, "eval_loss": 0.7841366529464722, "eval_rouge1": 0.0445, "eval_rouge2": 0.0092, "eval_rougeL": 0.039, "eval_rougeLsum": 0.0388, "eval_runtime": 13.9248, "eval_samples_per_second": 8.259, "eval_steps_per_second": 1.436, "step": 4196 }, { "epoch": 218.96, "eval_gen_len": 12.313, "eval_loss": 0.7825812101364136, "eval_rouge1": 0.0443, "eval_rouge2": 0.0087, "eval_rougeL": 0.0382, "eval_rougeLsum": 0.038, "eval_runtime": 14.6481, "eval_samples_per_second": 7.851, "eval_steps_per_second": 1.365, "step": 4215 }, { "epoch": 220.0, "eval_gen_len": 12.1217, "eval_loss": 0.7813829779624939, "eval_rouge1": 0.0438, "eval_rouge2": 0.0085, "eval_rougeL": 0.0379, "eval_rougeLsum": 0.0375, "eval_runtime": 16.4777, "eval_samples_per_second": 6.979, "eval_steps_per_second": 1.214, "step": 4235 }, { "epoch": 220.99, "eval_gen_len": 12.0348, "eval_loss": 0.7796338796615601, "eval_rouge1": 0.0431, "eval_rouge2": 0.0085, "eval_rougeL": 0.0372, "eval_rougeLsum": 0.0369, "eval_runtime": 15.2958, "eval_samples_per_second": 7.518, "eval_steps_per_second": 1.308, "step": 4254 }, { "epoch": 221.97, "eval_gen_len": 12.1043, "eval_loss": 0.77826988697052, "eval_rouge1": 0.0423, "eval_rouge2": 0.009, "eval_rougeL": 0.0365, "eval_rougeLsum": 0.0362, "eval_runtime": 16.2627, "eval_samples_per_second": 7.071, "eval_steps_per_second": 1.23, "step": 4273 }, { "epoch": 222.96, "eval_gen_len": 12.0435, "eval_loss": 0.7768360376358032, "eval_rouge1": 0.0426, "eval_rouge2": 0.009, "eval_rougeL": 0.0365, "eval_rougeLsum": 0.0363, "eval_runtime": 24.5776, "eval_samples_per_second": 4.679, "eval_steps_per_second": 0.814, "step": 4292 }, { "epoch": 224.0, "eval_gen_len": 12.0, "eval_loss": 0.7752098441123962, "eval_rouge1": 0.0425, "eval_rouge2": 0.009, "eval_rougeL": 0.0363, "eval_rougeLsum": 0.0361, "eval_runtime": 13.756, "eval_samples_per_second": 8.36, "eval_steps_per_second": 1.454, "step": 4312 }, { "epoch": 224.99, "eval_gen_len": 11.9391, "eval_loss": 0.7739911675453186, "eval_rouge1": 0.043, "eval_rouge2": 0.009, "eval_rougeL": 0.0371, "eval_rougeLsum": 0.0367, "eval_runtime": 15.726, "eval_samples_per_second": 7.313, "eval_steps_per_second": 1.272, "step": 4331 }, { "epoch": 225.97, "eval_gen_len": 11.8609, "eval_loss": 0.7723690271377563, "eval_rouge1": 0.0414, "eval_rouge2": 0.009, "eval_rougeL": 0.0357, "eval_rougeLsum": 0.0355, "eval_runtime": 16.5662, "eval_samples_per_second": 6.942, "eval_steps_per_second": 1.207, "step": 4350 }, { "epoch": 226.96, "eval_gen_len": 11.7652, "eval_loss": 0.7711983919143677, "eval_rouge1": 0.0429, "eval_rouge2": 0.0093, "eval_rougeL": 0.0363, "eval_rougeLsum": 0.0359, "eval_runtime": 14.4971, "eval_samples_per_second": 7.933, "eval_steps_per_second": 1.38, "step": 4369 }, { "epoch": 228.0, "eval_gen_len": 11.913, "eval_loss": 0.7694764733314514, "eval_rouge1": 0.0416, "eval_rouge2": 0.0093, "eval_rougeL": 0.0357, "eval_rougeLsum": 0.0354, "eval_runtime": 13.7683, "eval_samples_per_second": 8.353, "eval_steps_per_second": 1.453, "step": 4389 }, { "epoch": 228.99, "eval_gen_len": 12.0087, "eval_loss": 0.7683370113372803, "eval_rouge1": 0.0426, "eval_rouge2": 0.01, "eval_rougeL": 0.0369, "eval_rougeLsum": 0.0364, "eval_runtime": 14.7051, "eval_samples_per_second": 7.82, "eval_steps_per_second": 1.36, "step": 4408 }, { "epoch": 229.97, "eval_gen_len": 12.0696, "eval_loss": 0.7668902277946472, "eval_rouge1": 0.0422, "eval_rouge2": 0.0095, "eval_rougeL": 0.0364, "eval_rougeLsum": 0.036, "eval_runtime": 12.5967, "eval_samples_per_second": 9.129, "eval_steps_per_second": 1.588, "step": 4427 }, { "epoch": 230.96, "eval_gen_len": 11.7217, "eval_loss": 0.7656229734420776, "eval_rouge1": 0.0396, "eval_rouge2": 0.0094, "eval_rougeL": 0.0342, "eval_rougeLsum": 0.0339, "eval_runtime": 10.0582, "eval_samples_per_second": 11.433, "eval_steps_per_second": 1.988, "step": 4446 }, { "epoch": 232.0, "eval_gen_len": 11.5652, "eval_loss": 0.7644599676132202, "eval_rouge1": 0.0411, "eval_rouge2": 0.0091, "eval_rougeL": 0.0352, "eval_rougeLsum": 0.0349, "eval_runtime": 9.9608, "eval_samples_per_second": 11.545, "eval_steps_per_second": 2.008, "step": 4466 }, { "epoch": 232.99, "eval_gen_len": 11.7826, "eval_loss": 0.7628152370452881, "eval_rouge1": 0.0421, "eval_rouge2": 0.0095, "eval_rougeL": 0.0371, "eval_rougeLsum": 0.0371, "eval_runtime": 10.6119, "eval_samples_per_second": 10.837, "eval_steps_per_second": 1.885, "step": 4485 }, { "epoch": 233.77, "grad_norm": 0.5715782642364502, "learning_rate": 4.228070175438596e-06, "loss": 0.8871, "step": 4500 }, { "epoch": 233.97, "eval_gen_len": 11.8957, "eval_loss": 0.761337161064148, "eval_rouge1": 0.0436, "eval_rouge2": 0.0101, "eval_rougeL": 0.0382, "eval_rougeLsum": 0.0381, "eval_runtime": 17.7977, "eval_samples_per_second": 6.462, "eval_steps_per_second": 1.124, "step": 4504 }, { "epoch": 234.96, "eval_gen_len": 11.7652, "eval_loss": 0.7602398991584778, "eval_rouge1": 0.0424, "eval_rouge2": 0.0099, "eval_rougeL": 0.0372, "eval_rougeLsum": 0.0369, "eval_runtime": 10.3754, "eval_samples_per_second": 11.084, "eval_steps_per_second": 1.928, "step": 4523 }, { "epoch": 236.0, "eval_gen_len": 11.5652, "eval_loss": 0.7591829895973206, "eval_rouge1": 0.0419, "eval_rouge2": 0.0098, "eval_rougeL": 0.037, "eval_rougeLsum": 0.0367, "eval_runtime": 13.3388, "eval_samples_per_second": 8.621, "eval_steps_per_second": 1.499, "step": 4543 }, { "epoch": 236.99, "eval_gen_len": 11.6957, "eval_loss": 0.7578958868980408, "eval_rouge1": 0.0434, "eval_rouge2": 0.0102, "eval_rougeL": 0.0381, "eval_rougeLsum": 0.0378, "eval_runtime": 10.114, "eval_samples_per_second": 11.37, "eval_steps_per_second": 1.977, "step": 4562 }, { "epoch": 237.97, "eval_gen_len": 11.7652, "eval_loss": 0.7568346858024597, "eval_rouge1": 0.0448, "eval_rouge2": 0.0108, "eval_rougeL": 0.0383, "eval_rougeLsum": 0.0381, "eval_runtime": 10.3192, "eval_samples_per_second": 11.144, "eval_steps_per_second": 1.938, "step": 4581 }, { "epoch": 238.96, "eval_gen_len": 11.7739, "eval_loss": 0.7555378079414368, "eval_rouge1": 0.0455, "eval_rouge2": 0.0105, "eval_rougeL": 0.038, "eval_rougeLsum": 0.0379, "eval_runtime": 10.8876, "eval_samples_per_second": 10.562, "eval_steps_per_second": 1.837, "step": 4600 }, { "epoch": 240.0, "eval_gen_len": 11.8957, "eval_loss": 0.7544582486152649, "eval_rouge1": 0.0445, "eval_rouge2": 0.0105, "eval_rougeL": 0.0372, "eval_rougeLsum": 0.0371, "eval_runtime": 17.7147, "eval_samples_per_second": 6.492, "eval_steps_per_second": 1.129, "step": 4620 }, { "epoch": 240.99, "eval_gen_len": 12.0174, "eval_loss": 0.7532872557640076, "eval_rouge1": 0.0473, "eval_rouge2": 0.0105, "eval_rougeL": 0.0389, "eval_rougeLsum": 0.0388, "eval_runtime": 20.567, "eval_samples_per_second": 5.591, "eval_steps_per_second": 0.972, "step": 4639 }, { "epoch": 241.97, "eval_gen_len": 11.9913, "eval_loss": 0.7523981928825378, "eval_rouge1": 0.0482, "eval_rouge2": 0.0105, "eval_rougeL": 0.0393, "eval_rougeLsum": 0.0392, "eval_runtime": 10.1389, "eval_samples_per_second": 11.342, "eval_steps_per_second": 1.973, "step": 4658 }, { "epoch": 242.96, "eval_gen_len": 11.6609, "eval_loss": 0.7515619993209839, "eval_rouge1": 0.0454, "eval_rouge2": 0.0098, "eval_rougeL": 0.0379, "eval_rougeLsum": 0.0378, "eval_runtime": 10.4917, "eval_samples_per_second": 10.961, "eval_steps_per_second": 1.906, "step": 4677 }, { "epoch": 244.0, "eval_gen_len": 11.6696, "eval_loss": 0.7501043081283569, "eval_rouge1": 0.0447, "eval_rouge2": 0.0094, "eval_rougeL": 0.0372, "eval_rougeLsum": 0.0371, "eval_runtime": 10.4455, "eval_samples_per_second": 11.009, "eval_steps_per_second": 1.915, "step": 4697 }, { "epoch": 244.99, "eval_gen_len": 11.9826, "eval_loss": 0.7491604089736938, "eval_rouge1": 0.0469, "eval_rouge2": 0.0097, "eval_rougeL": 0.0389, "eval_rougeLsum": 0.0389, "eval_runtime": 14.1736, "eval_samples_per_second": 8.114, "eval_steps_per_second": 1.411, "step": 4716 }, { "epoch": 245.97, "eval_gen_len": 11.9913, "eval_loss": 0.7484715580940247, "eval_rouge1": 0.0479, "eval_rouge2": 0.0095, "eval_rougeL": 0.0397, "eval_rougeLsum": 0.0397, "eval_runtime": 13.6957, "eval_samples_per_second": 8.397, "eval_steps_per_second": 1.46, "step": 4735 }, { "epoch": 246.96, "eval_gen_len": 12.0522, "eval_loss": 0.747407853603363, "eval_rouge1": 0.0491, "eval_rouge2": 0.01, "eval_rougeL": 0.0403, "eval_rougeLsum": 0.0404, "eval_runtime": 10.4948, "eval_samples_per_second": 10.958, "eval_steps_per_second": 1.906, "step": 4754 }, { "epoch": 248.0, "eval_gen_len": 11.9826, "eval_loss": 0.746651291847229, "eval_rouge1": 0.0482, "eval_rouge2": 0.0092, "eval_rougeL": 0.0394, "eval_rougeLsum": 0.0395, "eval_runtime": 10.1337, "eval_samples_per_second": 11.348, "eval_steps_per_second": 1.974, "step": 4774 }, { "epoch": 248.99, "eval_gen_len": 12.1391, "eval_loss": 0.7458359003067017, "eval_rouge1": 0.0483, "eval_rouge2": 0.0084, "eval_rougeL": 0.0402, "eval_rougeLsum": 0.0403, "eval_runtime": 10.6355, "eval_samples_per_second": 10.813, "eval_steps_per_second": 1.88, "step": 4793 }, { "epoch": 249.97, "eval_gen_len": 12.2087, "eval_loss": 0.7449273467063904, "eval_rouge1": 0.0487, "eval_rouge2": 0.0083, "eval_rougeL": 0.0402, "eval_rougeLsum": 0.0404, "eval_runtime": 10.4912, "eval_samples_per_second": 10.962, "eval_steps_per_second": 1.906, "step": 4812 }, { "epoch": 250.96, "eval_gen_len": 11.9391, "eval_loss": 0.7444418668746948, "eval_rouge1": 0.0483, "eval_rouge2": 0.0083, "eval_rougeL": 0.0402, "eval_rougeLsum": 0.0403, "eval_runtime": 10.0739, "eval_samples_per_second": 11.416, "eval_steps_per_second": 1.985, "step": 4831 }, { "epoch": 252.0, "eval_gen_len": 11.913, "eval_loss": 0.7435948848724365, "eval_rouge1": 0.0479, "eval_rouge2": 0.0083, "eval_rougeL": 0.0396, "eval_rougeLsum": 0.0397, "eval_runtime": 10.3551, "eval_samples_per_second": 11.106, "eval_steps_per_second": 1.931, "step": 4851 }, { "epoch": 252.99, "eval_gen_len": 11.8783, "eval_loss": 0.7429930567741394, "eval_rouge1": 0.048, "eval_rouge2": 0.0083, "eval_rougeL": 0.0398, "eval_rougeLsum": 0.0399, "eval_runtime": 10.191, "eval_samples_per_second": 11.285, "eval_steps_per_second": 1.963, "step": 4870 }, { "epoch": 253.97, "eval_gen_len": 11.9652, "eval_loss": 0.7424508333206177, "eval_rouge1": 0.0481, "eval_rouge2": 0.0083, "eval_rougeL": 0.04, "eval_rougeLsum": 0.04, "eval_runtime": 10.1791, "eval_samples_per_second": 11.298, "eval_steps_per_second": 1.965, "step": 4889 }, { "epoch": 254.96, "eval_gen_len": 12.0174, "eval_loss": 0.7415958642959595, "eval_rouge1": 0.0486, "eval_rouge2": 0.0083, "eval_rougeL": 0.0398, "eval_rougeLsum": 0.0398, "eval_runtime": 16.411, "eval_samples_per_second": 7.007, "eval_steps_per_second": 1.219, "step": 4908 }, { "epoch": 256.0, "eval_gen_len": 11.7478, "eval_loss": 0.7406365871429443, "eval_rouge1": 0.0475, "eval_rouge2": 0.0083, "eval_rougeL": 0.0386, "eval_rougeLsum": 0.0387, "eval_runtime": 10.1485, "eval_samples_per_second": 11.332, "eval_steps_per_second": 1.971, "step": 4928 }, { "epoch": 256.99, "eval_gen_len": 11.8696, "eval_loss": 0.7399746179580688, "eval_rouge1": 0.0483, "eval_rouge2": 0.0079, "eval_rougeL": 0.039, "eval_rougeLsum": 0.0393, "eval_runtime": 16.5037, "eval_samples_per_second": 6.968, "eval_steps_per_second": 1.212, "step": 4947 }, { "epoch": 257.97, "eval_gen_len": 11.6609, "eval_loss": 0.7393442392349243, "eval_rouge1": 0.0467, "eval_rouge2": 0.0075, "eval_rougeL": 0.0377, "eval_rougeLsum": 0.0378, "eval_runtime": 12.2582, "eval_samples_per_second": 9.381, "eval_steps_per_second": 1.632, "step": 4966 }, { "epoch": 258.96, "eval_gen_len": 11.4087, "eval_loss": 0.7388782501220703, "eval_rouge1": 0.0455, "eval_rouge2": 0.0072, "eval_rougeL": 0.037, "eval_rougeLsum": 0.0372, "eval_runtime": 10.1512, "eval_samples_per_second": 11.329, "eval_steps_per_second": 1.97, "step": 4985 }, { "epoch": 259.74, "grad_norm": 0.4241856038570404, "learning_rate": 2.473684210526316e-06, "loss": 0.8499, "step": 5000 }, { "epoch": 260.0, "eval_gen_len": 11.5913, "eval_loss": 0.7382517457008362, "eval_rouge1": 0.0464, "eval_rouge2": 0.007, "eval_rougeL": 0.0372, "eval_rougeLsum": 0.0376, "eval_runtime": 10.3008, "eval_samples_per_second": 11.164, "eval_steps_per_second": 1.942, "step": 5005 }, { "epoch": 260.99, "eval_gen_len": 11.6348, "eval_loss": 0.7377699613571167, "eval_rouge1": 0.0482, "eval_rouge2": 0.0077, "eval_rougeL": 0.0385, "eval_rougeLsum": 0.0388, "eval_runtime": 10.0612, "eval_samples_per_second": 11.43, "eval_steps_per_second": 1.988, "step": 5024 }, { "epoch": 261.97, "eval_gen_len": 11.4522, "eval_loss": 0.7373109459877014, "eval_rouge1": 0.0483, "eval_rouge2": 0.008, "eval_rougeL": 0.0384, "eval_rougeLsum": 0.0385, "eval_runtime": 11.974, "eval_samples_per_second": 9.604, "eval_steps_per_second": 1.67, "step": 5043 }, { "epoch": 262.96, "eval_gen_len": 11.3913, "eval_loss": 0.7369760870933533, "eval_rouge1": 0.0474, "eval_rouge2": 0.0076, "eval_rougeL": 0.0375, "eval_rougeLsum": 0.0377, "eval_runtime": 10.4532, "eval_samples_per_second": 11.001, "eval_steps_per_second": 1.913, "step": 5062 }, { "epoch": 264.0, "eval_gen_len": 11.6696, "eval_loss": 0.7363179326057434, "eval_rouge1": 0.0486, "eval_rouge2": 0.0077, "eval_rougeL": 0.0385, "eval_rougeLsum": 0.0385, "eval_runtime": 10.1961, "eval_samples_per_second": 11.279, "eval_steps_per_second": 1.962, "step": 5082 }, { "epoch": 264.99, "eval_gen_len": 11.7826, "eval_loss": 0.7355965971946716, "eval_rouge1": 0.0493, "eval_rouge2": 0.0084, "eval_rougeL": 0.039, "eval_rougeLsum": 0.039, "eval_runtime": 10.1789, "eval_samples_per_second": 11.298, "eval_steps_per_second": 1.965, "step": 5101 }, { "epoch": 265.97, "eval_gen_len": 11.4609, "eval_loss": 0.735298752784729, "eval_rouge1": 0.047, "eval_rouge2": 0.0076, "eval_rougeL": 0.0371, "eval_rougeLsum": 0.0372, "eval_runtime": 10.1771, "eval_samples_per_second": 11.3, "eval_steps_per_second": 1.965, "step": 5120 }, { "epoch": 266.96, "eval_gen_len": 11.3217, "eval_loss": 0.734704852104187, "eval_rouge1": 0.0461, "eval_rouge2": 0.0076, "eval_rougeL": 0.0364, "eval_rougeLsum": 0.0366, "eval_runtime": 10.0146, "eval_samples_per_second": 11.483, "eval_steps_per_second": 1.997, "step": 5139 }, { "epoch": 268.0, "eval_gen_len": 11.3478, "eval_loss": 0.7341461777687073, "eval_rouge1": 0.0461, "eval_rouge2": 0.0078, "eval_rougeL": 0.0368, "eval_rougeLsum": 0.0371, "eval_runtime": 10.1605, "eval_samples_per_second": 11.318, "eval_steps_per_second": 1.968, "step": 5159 }, { "epoch": 268.99, "eval_gen_len": 11.3304, "eval_loss": 0.7336880564689636, "eval_rouge1": 0.0461, "eval_rouge2": 0.0078, "eval_rougeL": 0.0368, "eval_rougeLsum": 0.0371, "eval_runtime": 10.0336, "eval_samples_per_second": 11.462, "eval_steps_per_second": 1.993, "step": 5178 }, { "epoch": 269.97, "eval_gen_len": 11.3565, "eval_loss": 0.7333458662033081, "eval_rouge1": 0.0466, "eval_rouge2": 0.0084, "eval_rougeL": 0.0374, "eval_rougeLsum": 0.0378, "eval_runtime": 10.1088, "eval_samples_per_second": 11.376, "eval_steps_per_second": 1.978, "step": 5197 }, { "epoch": 270.96, "eval_gen_len": 11.4696, "eval_loss": 0.7329635620117188, "eval_rouge1": 0.0484, "eval_rouge2": 0.009, "eval_rougeL": 0.0383, "eval_rougeLsum": 0.0387, "eval_runtime": 10.0683, "eval_samples_per_second": 11.422, "eval_steps_per_second": 1.986, "step": 5216 }, { "epoch": 272.0, "eval_gen_len": 11.1826, "eval_loss": 0.7325230836868286, "eval_rouge1": 0.0471, "eval_rouge2": 0.0086, "eval_rougeL": 0.0373, "eval_rougeLsum": 0.0376, "eval_runtime": 10.2034, "eval_samples_per_second": 11.271, "eval_steps_per_second": 1.96, "step": 5236 }, { "epoch": 272.99, "eval_gen_len": 11.113, "eval_loss": 0.7321166396141052, "eval_rouge1": 0.0467, "eval_rouge2": 0.0085, "eval_rougeL": 0.0372, "eval_rougeLsum": 0.0377, "eval_runtime": 12.209, "eval_samples_per_second": 9.419, "eval_steps_per_second": 1.638, "step": 5255 }, { "epoch": 273.97, "eval_gen_len": 10.9304, "eval_loss": 0.731701135635376, "eval_rouge1": 0.0465, "eval_rouge2": 0.0085, "eval_rougeL": 0.037, "eval_rougeLsum": 0.0374, "eval_runtime": 10.4807, "eval_samples_per_second": 10.973, "eval_steps_per_second": 1.908, "step": 5274 }, { "epoch": 274.96, "eval_gen_len": 10.8609, "eval_loss": 0.7313553094863892, "eval_rouge1": 0.047, "eval_rouge2": 0.0089, "eval_rougeL": 0.0374, "eval_rougeLsum": 0.0379, "eval_runtime": 10.1881, "eval_samples_per_second": 11.288, "eval_steps_per_second": 1.963, "step": 5293 }, { "epoch": 276.0, "eval_gen_len": 10.8261, "eval_loss": 0.7310741543769836, "eval_rouge1": 0.0468, "eval_rouge2": 0.0086, "eval_rougeL": 0.0374, "eval_rougeLsum": 0.038, "eval_runtime": 10.4254, "eval_samples_per_second": 11.031, "eval_steps_per_second": 1.918, "step": 5313 }, { "epoch": 276.99, "eval_gen_len": 11.0348, "eval_loss": 0.7306625247001648, "eval_rouge1": 0.0473, "eval_rouge2": 0.0086, "eval_rougeL": 0.0379, "eval_rougeLsum": 0.0384, "eval_runtime": 17.0869, "eval_samples_per_second": 6.73, "eval_steps_per_second": 1.17, "step": 5332 }, { "epoch": 277.97, "eval_gen_len": 11.0609, "eval_loss": 0.7303984761238098, "eval_rouge1": 0.0482, "eval_rouge2": 0.0089, "eval_rougeL": 0.0388, "eval_rougeLsum": 0.0393, "eval_runtime": 10.2327, "eval_samples_per_second": 11.238, "eval_steps_per_second": 1.955, "step": 5351 }, { "epoch": 278.96, "eval_gen_len": 11.1304, "eval_loss": 0.7300783395767212, "eval_rouge1": 0.0482, "eval_rouge2": 0.0089, "eval_rougeL": 0.0388, "eval_rougeLsum": 0.0393, "eval_runtime": 10.0601, "eval_samples_per_second": 11.431, "eval_steps_per_second": 1.988, "step": 5370 }, { "epoch": 280.0, "eval_gen_len": 10.9826, "eval_loss": 0.729738175868988, "eval_rouge1": 0.0477, "eval_rouge2": 0.0089, "eval_rougeL": 0.0384, "eval_rougeLsum": 0.039, "eval_runtime": 10.0013, "eval_samples_per_second": 11.498, "eval_steps_per_second": 2.0, "step": 5390 }, { "epoch": 280.99, "eval_gen_len": 10.9652, "eval_loss": 0.7294939756393433, "eval_rouge1": 0.048, "eval_rouge2": 0.0089, "eval_rougeL": 0.0386, "eval_rougeLsum": 0.0392, "eval_runtime": 9.9502, "eval_samples_per_second": 11.558, "eval_steps_per_second": 2.01, "step": 5409 }, { "epoch": 281.97, "eval_gen_len": 11.0435, "eval_loss": 0.7292339205741882, "eval_rouge1": 0.0486, "eval_rouge2": 0.0093, "eval_rougeL": 0.039, "eval_rougeLsum": 0.0394, "eval_runtime": 10.0288, "eval_samples_per_second": 11.467, "eval_steps_per_second": 1.994, "step": 5428 }, { "epoch": 282.96, "eval_gen_len": 10.9478, "eval_loss": 0.7289875149726868, "eval_rouge1": 0.0488, "eval_rouge2": 0.0095, "eval_rougeL": 0.0393, "eval_rougeLsum": 0.0398, "eval_runtime": 9.9992, "eval_samples_per_second": 11.501, "eval_steps_per_second": 2.0, "step": 5447 }, { "epoch": 284.0, "eval_gen_len": 10.887, "eval_loss": 0.7288010120391846, "eval_rouge1": 0.0483, "eval_rouge2": 0.0093, "eval_rougeL": 0.0388, "eval_rougeLsum": 0.0392, "eval_runtime": 11.7568, "eval_samples_per_second": 9.782, "eval_steps_per_second": 1.701, "step": 5467 }, { "epoch": 284.99, "eval_gen_len": 10.7391, "eval_loss": 0.7286383509635925, "eval_rouge1": 0.0472, "eval_rouge2": 0.0091, "eval_rougeL": 0.038, "eval_rougeLsum": 0.0383, "eval_runtime": 10.4604, "eval_samples_per_second": 10.994, "eval_steps_per_second": 1.912, "step": 5486 }, { "epoch": 285.71, "grad_norm": 0.46214622259140015, "learning_rate": 7.192982456140352e-07, "loss": 0.8305, "step": 5500 } ], "logging_steps": 500, "max_steps": 5700, "num_input_tokens_seen": 0, "num_train_epochs": 300, "save_steps": 500, "total_flos": 1.6008000528973824e+17, "train_batch_size": 6, "trial_name": null, "trial_params": null }