{ "best_metric": null, "best_model_checkpoint": null, "epoch": 441.5584415584416, "eval_steps": 500, "global_step": 8500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.99, "eval_gen_len": 14.3217, "eval_loss": 21.81446075439453, "eval_rouge1": 0.0662, "eval_rouge2": 0.0082, "eval_rougeL": 0.0523, "eval_rougeLsum": 0.0526, "eval_runtime": 12.9343, "eval_samples_per_second": 8.891, "eval_steps_per_second": 1.546, "step": 19 }, { "epoch": 1.97, "eval_gen_len": 14.4, "eval_loss": 21.760791778564453, "eval_rouge1": 0.0661, "eval_rouge2": 0.0082, "eval_rougeL": 0.052, "eval_rougeLsum": 0.0523, "eval_runtime": 11.8223, "eval_samples_per_second": 9.727, "eval_steps_per_second": 1.692, "step": 38 }, { "epoch": 2.96, "eval_gen_len": 14.4, "eval_loss": 21.68472671508789, "eval_rouge1": 0.0651, "eval_rouge2": 0.0077, "eval_rougeL": 0.051, "eval_rougeLsum": 0.0513, "eval_runtime": 12.1423, "eval_samples_per_second": 9.471, "eval_steps_per_second": 1.647, "step": 57 }, { "epoch": 4.0, "eval_gen_len": 14.3304, "eval_loss": 21.582918167114258, "eval_rouge1": 0.0666, "eval_rouge2": 0.0081, "eval_rougeL": 0.0525, "eval_rougeLsum": 0.0525, "eval_runtime": 12.0406, "eval_samples_per_second": 9.551, "eval_steps_per_second": 1.661, "step": 77 }, { "epoch": 4.99, "eval_gen_len": 14.2609, "eval_loss": 21.46294403076172, "eval_rouge1": 0.0668, "eval_rouge2": 0.008, "eval_rougeL": 0.0527, "eval_rougeLsum": 0.0526, "eval_runtime": 12.3725, "eval_samples_per_second": 9.295, "eval_steps_per_second": 1.616, "step": 96 }, { "epoch": 5.97, "eval_gen_len": 14.2261, "eval_loss": 21.31937599182129, "eval_rouge1": 0.0671, "eval_rouge2": 0.0077, "eval_rougeL": 0.0525, "eval_rougeLsum": 0.0525, "eval_runtime": 12.5289, "eval_samples_per_second": 9.179, "eval_steps_per_second": 1.596, "step": 115 }, { "epoch": 6.96, "eval_gen_len": 14.3043, "eval_loss": 21.15254783630371, "eval_rouge1": 0.0677, "eval_rouge2": 0.0077, "eval_rougeL": 0.0525, "eval_rougeLsum": 0.0525, "eval_runtime": 12.0936, "eval_samples_per_second": 9.509, "eval_steps_per_second": 1.654, "step": 134 }, { "epoch": 8.0, "eval_gen_len": 14.2609, "eval_loss": 20.956642150878906, "eval_rouge1": 0.0679, "eval_rouge2": 0.0076, "eval_rougeL": 0.0522, "eval_rougeLsum": 0.0521, "eval_runtime": 12.6351, "eval_samples_per_second": 9.102, "eval_steps_per_second": 1.583, "step": 154 }, { "epoch": 8.99, "eval_gen_len": 14.1217, "eval_loss": 20.7694091796875, "eval_rouge1": 0.0664, "eval_rouge2": 0.0074, "eval_rougeL": 0.0507, "eval_rougeLsum": 0.0507, "eval_runtime": 12.3661, "eval_samples_per_second": 9.3, "eval_steps_per_second": 1.617, "step": 173 }, { "epoch": 9.97, "eval_gen_len": 14.0957, "eval_loss": 20.60512351989746, "eval_rouge1": 0.0685, "eval_rouge2": 0.0078, "eval_rougeL": 0.0519, "eval_rougeLsum": 0.0518, "eval_runtime": 12.6094, "eval_samples_per_second": 9.12, "eval_steps_per_second": 1.586, "step": 192 }, { "epoch": 10.96, "eval_gen_len": 13.9913, "eval_loss": 20.46657371520996, "eval_rouge1": 0.0672, "eval_rouge2": 0.0072, "eval_rougeL": 0.0511, "eval_rougeLsum": 0.0511, "eval_runtime": 12.4343, "eval_samples_per_second": 9.249, "eval_steps_per_second": 1.608, "step": 211 }, { "epoch": 12.0, "eval_gen_len": 14.2957, "eval_loss": 20.335567474365234, "eval_rouge1": 0.0658, "eval_rouge2": 0.0079, "eval_rougeL": 0.05, "eval_rougeLsum": 0.05, "eval_runtime": 12.4623, "eval_samples_per_second": 9.228, "eval_steps_per_second": 1.605, "step": 231 }, { "epoch": 12.99, "eval_gen_len": 14.7826, "eval_loss": 20.210546493530273, "eval_rouge1": 0.0729, "eval_rouge2": 0.0108, "eval_rougeL": 0.0562, "eval_rougeLsum": 0.056, "eval_runtime": 12.6201, "eval_samples_per_second": 9.112, "eval_steps_per_second": 1.585, "step": 250 }, { "epoch": 13.97, "eval_gen_len": 14.8174, "eval_loss": 20.080350875854492, "eval_rouge1": 0.0731, "eval_rouge2": 0.0105, "eval_rougeL": 0.057, "eval_rougeLsum": 0.0566, "eval_runtime": 12.5892, "eval_samples_per_second": 9.135, "eval_steps_per_second": 1.589, "step": 269 }, { "epoch": 14.96, "eval_gen_len": 15.3826, "eval_loss": 19.947362899780273, "eval_rouge1": 0.0805, "eval_rouge2": 0.0127, "eval_rougeL": 0.0624, "eval_rougeLsum": 0.0619, "eval_runtime": 12.2446, "eval_samples_per_second": 9.392, "eval_steps_per_second": 1.633, "step": 288 }, { "epoch": 16.0, "eval_gen_len": 15.8261, "eval_loss": 19.80646324157715, "eval_rouge1": 0.0818, "eval_rouge2": 0.0129, "eval_rougeL": 0.0636, "eval_rougeLsum": 0.0633, "eval_runtime": 12.037, "eval_samples_per_second": 9.554, "eval_steps_per_second": 1.662, "step": 308 }, { "epoch": 16.99, "eval_gen_len": 16.1391, "eval_loss": 19.66917610168457, "eval_rouge1": 0.0837, "eval_rouge2": 0.0139, "eval_rougeL": 0.0648, "eval_rougeLsum": 0.0647, "eval_runtime": 12.6734, "eval_samples_per_second": 9.074, "eval_steps_per_second": 1.578, "step": 327 }, { "epoch": 17.97, "eval_gen_len": 16.5391, "eval_loss": 19.5202579498291, "eval_rouge1": 0.0915, "eval_rouge2": 0.0168, "eval_rougeL": 0.0713, "eval_rougeLsum": 0.0709, "eval_runtime": 12.3571, "eval_samples_per_second": 9.306, "eval_steps_per_second": 1.619, "step": 346 }, { "epoch": 18.96, "eval_gen_len": 16.687, "eval_loss": 19.36480712890625, "eval_rouge1": 0.0934, "eval_rouge2": 0.0178, "eval_rougeL": 0.072, "eval_rougeLsum": 0.0719, "eval_runtime": 12.3428, "eval_samples_per_second": 9.317, "eval_steps_per_second": 1.62, "step": 365 }, { "epoch": 20.0, "eval_gen_len": 17.0522, "eval_loss": 19.185970306396484, "eval_rouge1": 0.0951, "eval_rouge2": 0.0172, "eval_rougeL": 0.0735, "eval_rougeLsum": 0.0735, "eval_runtime": 12.4924, "eval_samples_per_second": 9.206, "eval_steps_per_second": 1.601, "step": 385 }, { "epoch": 20.99, "eval_gen_len": 17.1913, "eval_loss": 18.998315811157227, "eval_rouge1": 0.0938, "eval_rouge2": 0.0192, "eval_rougeL": 0.0754, "eval_rougeLsum": 0.0755, "eval_runtime": 12.2427, "eval_samples_per_second": 9.393, "eval_steps_per_second": 1.634, "step": 404 }, { "epoch": 21.97, "eval_gen_len": 17.3304, "eval_loss": 18.782604217529297, "eval_rouge1": 0.0975, "eval_rouge2": 0.0223, "eval_rougeL": 0.0786, "eval_rougeLsum": 0.0788, "eval_runtime": 12.4341, "eval_samples_per_second": 9.249, "eval_steps_per_second": 1.608, "step": 423 }, { "epoch": 22.96, "eval_gen_len": 17.3304, "eval_loss": 18.529996871948242, "eval_rouge1": 0.0986, "eval_rouge2": 0.0229, "eval_rougeL": 0.0787, "eval_rougeLsum": 0.079, "eval_runtime": 12.138, "eval_samples_per_second": 9.474, "eval_steps_per_second": 1.648, "step": 442 }, { "epoch": 24.0, "eval_gen_len": 17.4696, "eval_loss": 18.21288299560547, "eval_rouge1": 0.0935, "eval_rouge2": 0.0195, "eval_rougeL": 0.0761, "eval_rougeLsum": 0.0763, "eval_runtime": 12.3892, "eval_samples_per_second": 9.282, "eval_steps_per_second": 1.614, "step": 462 }, { "epoch": 24.99, "eval_gen_len": 17.6087, "eval_loss": 17.844209671020508, "eval_rouge1": 0.0936, "eval_rouge2": 0.0225, "eval_rougeL": 0.0756, "eval_rougeLsum": 0.0758, "eval_runtime": 12.2522, "eval_samples_per_second": 9.386, "eval_steps_per_second": 1.632, "step": 481 }, { "epoch": 25.97, "grad_norm": 4.415005683898926, "learning_rate": 9.474736842105265e-06, "loss": 19.6383, "step": 500 }, { "epoch": 25.97, "eval_gen_len": 17.7478, "eval_loss": 17.39573097229004, "eval_rouge1": 0.0967, "eval_rouge2": 0.0221, "eval_rougeL": 0.0765, "eval_rougeLsum": 0.0764, "eval_runtime": 12.5376, "eval_samples_per_second": 9.172, "eval_steps_per_second": 1.595, "step": 500 }, { "epoch": 26.96, "eval_gen_len": 17.7478, "eval_loss": 16.90059471130371, "eval_rouge1": 0.0983, "eval_rouge2": 0.0198, "eval_rougeL": 0.0786, "eval_rougeLsum": 0.0785, "eval_runtime": 12.1506, "eval_samples_per_second": 9.465, "eval_steps_per_second": 1.646, "step": 519 }, { "epoch": 28.0, "eval_gen_len": 17.3304, "eval_loss": 16.37845802307129, "eval_rouge1": 0.0916, "eval_rouge2": 0.0162, "eval_rougeL": 0.0723, "eval_rougeLsum": 0.0724, "eval_runtime": 12.9559, "eval_samples_per_second": 8.876, "eval_steps_per_second": 1.544, "step": 539 }, { "epoch": 28.99, "eval_gen_len": 17.0522, "eval_loss": 15.880877494812012, "eval_rouge1": 0.0898, "eval_rouge2": 0.0212, "eval_rougeL": 0.0716, "eval_rougeLsum": 0.0711, "eval_runtime": 12.3917, "eval_samples_per_second": 9.28, "eval_steps_per_second": 1.614, "step": 558 }, { "epoch": 29.97, "eval_gen_len": 17.6087, "eval_loss": 15.38318920135498, "eval_rouge1": 0.09, "eval_rouge2": 0.0179, "eval_rougeL": 0.0717, "eval_rougeLsum": 0.0717, "eval_runtime": 12.4102, "eval_samples_per_second": 9.267, "eval_steps_per_second": 1.612, "step": 577 }, { "epoch": 30.96, "eval_gen_len": 17.6087, "eval_loss": 14.880407333374023, "eval_rouge1": 0.0921, "eval_rouge2": 0.02, "eval_rougeL": 0.0768, "eval_rougeLsum": 0.0765, "eval_runtime": 12.1352, "eval_samples_per_second": 9.477, "eval_steps_per_second": 1.648, "step": 596 }, { "epoch": 32.0, "eval_gen_len": 17.7478, "eval_loss": 14.299490928649902, "eval_rouge1": 0.0856, "eval_rouge2": 0.0197, "eval_rougeL": 0.0713, "eval_rougeLsum": 0.071, "eval_runtime": 12.7436, "eval_samples_per_second": 9.024, "eval_steps_per_second": 1.569, "step": 616 }, { "epoch": 32.99, "eval_gen_len": 17.0522, "eval_loss": 13.684185981750488, "eval_rouge1": 0.0796, "eval_rouge2": 0.0153, "eval_rougeL": 0.0627, "eval_rougeLsum": 0.0626, "eval_runtime": 12.5048, "eval_samples_per_second": 9.196, "eval_steps_per_second": 1.599, "step": 635 }, { "epoch": 33.97, "eval_gen_len": 17.3304, "eval_loss": 13.098108291625977, "eval_rouge1": 0.0772, "eval_rouge2": 0.0098, "eval_rougeL": 0.0646, "eval_rougeLsum": 0.0643, "eval_runtime": 12.1222, "eval_samples_per_second": 9.487, "eval_steps_per_second": 1.65, "step": 654 }, { "epoch": 34.96, "eval_gen_len": 17.4696, "eval_loss": 12.545892715454102, "eval_rouge1": 0.0751, "eval_rouge2": 0.0107, "eval_rougeL": 0.0622, "eval_rougeLsum": 0.0622, "eval_runtime": 12.1398, "eval_samples_per_second": 9.473, "eval_steps_per_second": 1.647, "step": 673 }, { "epoch": 36.0, "eval_gen_len": 17.3304, "eval_loss": 12.004250526428223, "eval_rouge1": 0.0704, "eval_rouge2": 0.0081, "eval_rougeL": 0.0588, "eval_rougeLsum": 0.0588, "eval_runtime": 12.5092, "eval_samples_per_second": 9.193, "eval_steps_per_second": 1.599, "step": 693 }, { "epoch": 36.99, "eval_gen_len": 17.887, "eval_loss": 11.514721870422363, "eval_rouge1": 0.0672, "eval_rouge2": 0.0079, "eval_rougeL": 0.0577, "eval_rougeLsum": 0.0577, "eval_runtime": 12.4809, "eval_samples_per_second": 9.214, "eval_steps_per_second": 1.602, "step": 712 }, { "epoch": 37.97, "eval_gen_len": 17.6087, "eval_loss": 11.038866996765137, "eval_rouge1": 0.0513, "eval_rouge2": 0.0045, "eval_rougeL": 0.046, "eval_rougeLsum": 0.0457, "eval_runtime": 12.6414, "eval_samples_per_second": 9.097, "eval_steps_per_second": 1.582, "step": 731 }, { "epoch": 38.96, "eval_gen_len": 17.1913, "eval_loss": 10.570833206176758, "eval_rouge1": 0.0468, "eval_rouge2": 0.0029, "eval_rougeL": 0.0404, "eval_rougeLsum": 0.0403, "eval_runtime": 12.7674, "eval_samples_per_second": 9.007, "eval_steps_per_second": 1.566, "step": 750 }, { "epoch": 40.0, "eval_gen_len": 17.3565, "eval_loss": 10.076028823852539, "eval_rouge1": 0.0352, "eval_rouge2": 0.0037, "eval_rougeL": 0.0305, "eval_rougeLsum": 0.0304, "eval_runtime": 12.8987, "eval_samples_per_second": 8.916, "eval_steps_per_second": 1.551, "step": 770 }, { "epoch": 40.99, "eval_gen_len": 17.4957, "eval_loss": 9.603371620178223, "eval_rouge1": 0.024, "eval_rouge2": 0.0024, "eval_rougeL": 0.0218, "eval_rougeLsum": 0.0218, "eval_runtime": 12.2083, "eval_samples_per_second": 9.42, "eval_steps_per_second": 1.638, "step": 789 }, { "epoch": 41.97, "eval_gen_len": 18.3043, "eval_loss": 9.131211280822754, "eval_rouge1": 0.0193, "eval_rouge2": 0.0022, "eval_rougeL": 0.0184, "eval_rougeLsum": 0.0185, "eval_runtime": 12.1389, "eval_samples_per_second": 9.474, "eval_steps_per_second": 1.648, "step": 808 }, { "epoch": 42.96, "eval_gen_len": 18.7217, "eval_loss": 8.668445587158203, "eval_rouge1": 0.0116, "eval_rouge2": 0.0013, "eval_rougeL": 0.0109, "eval_rougeLsum": 0.0108, "eval_runtime": 12.4712, "eval_samples_per_second": 9.221, "eval_steps_per_second": 1.604, "step": 827 }, { "epoch": 44.0, "eval_gen_len": 18.8609, "eval_loss": 8.1836576461792, "eval_rouge1": 0.0031, "eval_rouge2": 0.0004, "eval_rougeL": 0.0032, "eval_rougeLsum": 0.0032, "eval_runtime": 12.1395, "eval_samples_per_second": 9.473, "eval_steps_per_second": 1.648, "step": 847 }, { "epoch": 44.99, "eval_gen_len": 18.8609, "eval_loss": 7.736245632171631, "eval_rouge1": 0.0028, "eval_rouge2": 0.0002, "eval_rougeL": 0.0028, "eval_rougeLsum": 0.0028, "eval_runtime": 12.2043, "eval_samples_per_second": 9.423, "eval_steps_per_second": 1.639, "step": 866 }, { "epoch": 45.97, "eval_gen_len": 18.8609, "eval_loss": 7.298835277557373, "eval_rouge1": 0.0018, "eval_rouge2": 0.0004, "eval_rougeL": 0.0018, "eval_rougeLsum": 0.0018, "eval_runtime": 12.1301, "eval_samples_per_second": 9.481, "eval_steps_per_second": 1.649, "step": 885 }, { "epoch": 46.96, "eval_gen_len": 19.0, "eval_loss": 6.873920917510986, "eval_rouge1": 0.001, "eval_rouge2": 0.0002, "eval_rougeL": 0.001, "eval_rougeLsum": 0.0009, "eval_runtime": 12.1519, "eval_samples_per_second": 9.464, "eval_steps_per_second": 1.646, "step": 904 }, { "epoch": 48.0, "eval_gen_len": 19.0, "eval_loss": 6.440176486968994, "eval_rouge1": 0.0001, "eval_rouge2": 0.0, "eval_rougeL": 0.0001, "eval_rougeLsum": 0.0001, "eval_runtime": 12.3618, "eval_samples_per_second": 9.303, "eval_steps_per_second": 1.618, "step": 924 }, { "epoch": 48.99, "eval_gen_len": 19.0, "eval_loss": 6.049317359924316, "eval_rouge1": 0.0006, "eval_rouge2": 0.0, "eval_rougeL": 0.0006, "eval_rougeLsum": 0.0005, "eval_runtime": 12.167, "eval_samples_per_second": 9.452, "eval_steps_per_second": 1.644, "step": 943 }, { "epoch": 49.97, "eval_gen_len": 19.0, "eval_loss": 5.643195152282715, "eval_rouge1": 0.0002, "eval_rouge2": 0.0, "eval_rougeL": 0.0002, "eval_rougeLsum": 0.0002, "eval_runtime": 11.9138, "eval_samples_per_second": 9.653, "eval_steps_per_second": 1.679, "step": 962 }, { "epoch": 50.96, "eval_gen_len": 19.0, "eval_loss": 5.2581987380981445, "eval_rouge1": 0.0007, "eval_rouge2": 0.0, "eval_rougeL": 0.0008, "eval_rougeLsum": 0.0007, "eval_runtime": 12.5979, "eval_samples_per_second": 9.129, "eval_steps_per_second": 1.588, "step": 981 }, { "epoch": 51.95, "grad_norm": 5.70858097076416, "learning_rate": 8.949473684210527e-06, "loss": 11.5478, "step": 1000 }, { "epoch": 52.0, "eval_gen_len": 19.0, "eval_loss": 4.877782344818115, "eval_rouge1": 0.0006, "eval_rouge2": 0.0, "eval_rougeL": 0.0006, "eval_rougeLsum": 0.0006, "eval_runtime": 12.0728, "eval_samples_per_second": 9.526, "eval_steps_per_second": 1.657, "step": 1001 }, { "epoch": 52.99, "eval_gen_len": 19.0, "eval_loss": 4.53688383102417, "eval_rouge1": 0.0006, "eval_rouge2": 0.0, "eval_rougeL": 0.0006, "eval_rougeLsum": 0.0006, "eval_runtime": 12.1263, "eval_samples_per_second": 9.484, "eval_steps_per_second": 1.649, "step": 1020 }, { "epoch": 53.97, "eval_gen_len": 19.0, "eval_loss": 4.222665309906006, "eval_rouge1": 0.0002, "eval_rouge2": 0.0, "eval_rougeL": 0.0002, "eval_rougeLsum": 0.0002, "eval_runtime": 12.5806, "eval_samples_per_second": 9.141, "eval_steps_per_second": 1.59, "step": 1039 }, { "epoch": 54.96, "eval_gen_len": 19.0, "eval_loss": 3.9297854900360107, "eval_rouge1": 0.0008, "eval_rouge2": 0.0, "eval_rougeL": 0.0008, "eval_rougeLsum": 0.0007, "eval_runtime": 12.7467, "eval_samples_per_second": 9.022, "eval_steps_per_second": 1.569, "step": 1058 }, { "epoch": 56.0, "eval_gen_len": 19.0, "eval_loss": 3.6505942344665527, "eval_rouge1": 0.0005, "eval_rouge2": 0.0, "eval_rougeL": 0.0005, "eval_rougeLsum": 0.0005, "eval_runtime": 12.6962, "eval_samples_per_second": 9.058, "eval_steps_per_second": 1.575, "step": 1078 }, { "epoch": 56.99, "eval_gen_len": 19.0, "eval_loss": 3.4100279808044434, "eval_rouge1": 0.0002, "eval_rouge2": 0.0, "eval_rougeL": 0.0002, "eval_rougeLsum": 0.0002, "eval_runtime": 12.5475, "eval_samples_per_second": 9.165, "eval_steps_per_second": 1.594, "step": 1097 }, { "epoch": 57.97, "eval_gen_len": 19.0, "eval_loss": 3.197094202041626, "eval_rouge1": 0.0006, "eval_rouge2": 0.0, "eval_rougeL": 0.0006, "eval_rougeLsum": 0.0006, "eval_runtime": 12.1721, "eval_samples_per_second": 9.448, "eval_steps_per_second": 1.643, "step": 1116 }, { "epoch": 58.96, "eval_gen_len": 18.9913, "eval_loss": 3.0094308853149414, "eval_rouge1": 0.0006, "eval_rouge2": 0.0, "eval_rougeL": 0.0006, "eval_rougeLsum": 0.0006, "eval_runtime": 12.2313, "eval_samples_per_second": 9.402, "eval_steps_per_second": 1.635, "step": 1135 }, { "epoch": 60.0, "eval_gen_len": 18.9913, "eval_loss": 2.841013193130493, "eval_rouge1": 0.0008, "eval_rouge2": 0.0, "eval_rougeL": 0.0008, "eval_rougeLsum": 0.0008, "eval_runtime": 12.5511, "eval_samples_per_second": 9.163, "eval_steps_per_second": 1.593, "step": 1155 }, { "epoch": 60.99, "eval_gen_len": 18.9826, "eval_loss": 2.698159694671631, "eval_rouge1": 0.0007, "eval_rouge2": 0.0, "eval_rougeL": 0.0007, "eval_rougeLsum": 0.0007, "eval_runtime": 12.0968, "eval_samples_per_second": 9.507, "eval_steps_per_second": 1.653, "step": 1174 }, { "epoch": 61.97, "eval_gen_len": 18.6783, "eval_loss": 2.5645217895507812, "eval_rouge1": 0.0011, "eval_rouge2": 0.0, "eval_rougeL": 0.0011, "eval_rougeLsum": 0.0011, "eval_runtime": 12.1821, "eval_samples_per_second": 9.44, "eval_steps_per_second": 1.642, "step": 1193 }, { "epoch": 62.96, "eval_gen_len": 17.3565, "eval_loss": 2.445538282394409, "eval_rouge1": 0.0007, "eval_rouge2": 0.0, "eval_rougeL": 0.0007, "eval_rougeLsum": 0.0007, "eval_runtime": 12.1339, "eval_samples_per_second": 9.478, "eval_steps_per_second": 1.648, "step": 1212 }, { "epoch": 64.0, "eval_gen_len": 13.5043, "eval_loss": 2.338679075241089, "eval_rouge1": 0.0011, "eval_rouge2": 0.0, "eval_rougeL": 0.001, "eval_rougeLsum": 0.001, "eval_runtime": 12.4085, "eval_samples_per_second": 9.268, "eval_steps_per_second": 1.612, "step": 1232 }, { "epoch": 64.99, "eval_gen_len": 9.4348, "eval_loss": 2.2483484745025635, "eval_rouge1": 0.0002, "eval_rouge2": 0.0, "eval_rougeL": 0.0002, "eval_rougeLsum": 0.0002, "eval_runtime": 12.3896, "eval_samples_per_second": 9.282, "eval_steps_per_second": 1.614, "step": 1251 }, { "epoch": 65.97, "eval_gen_len": 6.9652, "eval_loss": 2.1728155612945557, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.4531, "eval_samples_per_second": 9.235, "eval_steps_per_second": 1.606, "step": 1270 }, { "epoch": 66.96, "eval_gen_len": 6.2957, "eval_loss": 2.1103546619415283, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.2925, "eval_samples_per_second": 9.355, "eval_steps_per_second": 1.627, "step": 1289 }, { "epoch": 68.0, "eval_gen_len": 6.0, "eval_loss": 2.0531256198883057, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.1269, "eval_samples_per_second": 9.483, "eval_steps_per_second": 1.649, "step": 1309 }, { "epoch": 68.99, "eval_gen_len": 5.7043, "eval_loss": 2.006763219833374, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.0803, "eval_samples_per_second": 9.52, "eval_steps_per_second": 1.656, "step": 1328 }, { "epoch": 69.97, "eval_gen_len": 5.6609, "eval_loss": 1.9675697088241577, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 13.197, "eval_samples_per_second": 8.714, "eval_steps_per_second": 1.515, "step": 1347 }, { "epoch": 70.96, "eval_gen_len": 5.6, "eval_loss": 1.9337714910507202, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.696, "eval_samples_per_second": 9.058, "eval_steps_per_second": 1.575, "step": 1366 }, { "epoch": 72.0, "eval_gen_len": 5.6174, "eval_loss": 1.9011404514312744, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.5629, "eval_samples_per_second": 9.154, "eval_steps_per_second": 1.592, "step": 1386 }, { "epoch": 72.99, "eval_gen_len": 5.6435, "eval_loss": 1.8734184503555298, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.9654, "eval_samples_per_second": 8.87, "eval_steps_per_second": 1.543, "step": 1405 }, { "epoch": 73.97, "eval_gen_len": 5.7739, "eval_loss": 1.84665846824646, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.8257, "eval_samples_per_second": 8.966, "eval_steps_per_second": 1.559, "step": 1424 }, { "epoch": 74.96, "eval_gen_len": 5.7478, "eval_loss": 1.8196372985839844, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.89, "eval_samples_per_second": 8.922, "eval_steps_per_second": 1.552, "step": 1443 }, { "epoch": 76.0, "eval_gen_len": 5.7217, "eval_loss": 1.797453761100769, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.4957, "eval_samples_per_second": 9.203, "eval_steps_per_second": 1.601, "step": 1463 }, { "epoch": 76.99, "eval_gen_len": 5.8174, "eval_loss": 1.7788159847259521, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.4326, "eval_samples_per_second": 9.25, "eval_steps_per_second": 1.609, "step": 1482 }, { "epoch": 77.92, "grad_norm": 2.0204899311065674, "learning_rate": 8.42421052631579e-06, "loss": 3.2357, "step": 1500 }, { "epoch": 77.97, "eval_gen_len": 5.8957, "eval_loss": 1.76212739944458, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.5086, "eval_samples_per_second": 9.194, "eval_steps_per_second": 1.599, "step": 1501 }, { "epoch": 78.96, "eval_gen_len": 5.8957, "eval_loss": 1.744727373123169, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.3492, "eval_samples_per_second": 9.312, "eval_steps_per_second": 1.62, "step": 1520 }, { "epoch": 80.0, "eval_gen_len": 5.9391, "eval_loss": 1.7277677059173584, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.126, "eval_samples_per_second": 9.484, "eval_steps_per_second": 1.649, "step": 1540 }, { "epoch": 80.99, "eval_gen_len": 5.8435, "eval_loss": 1.7146191596984863, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.5132, "eval_samples_per_second": 9.19, "eval_steps_per_second": 1.598, "step": 1559 }, { "epoch": 81.97, "eval_gen_len": 5.513, "eval_loss": 1.7026437520980835, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.7453, "eval_samples_per_second": 9.023, "eval_steps_per_second": 1.569, "step": 1578 }, { "epoch": 82.96, "eval_gen_len": 5.5652, "eval_loss": 1.68914794921875, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.98, "eval_samples_per_second": 8.86, "eval_steps_per_second": 1.541, "step": 1597 }, { "epoch": 84.0, "eval_gen_len": 5.3304, "eval_loss": 1.6754295825958252, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.127, "eval_samples_per_second": 9.483, "eval_steps_per_second": 1.649, "step": 1617 }, { "epoch": 84.99, "eval_gen_len": 5.6435, "eval_loss": 1.6632497310638428, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.2091, "eval_samples_per_second": 9.419, "eval_steps_per_second": 1.638, "step": 1636 }, { "epoch": 85.97, "eval_gen_len": 5.9652, "eval_loss": 1.652411699295044, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.3822, "eval_samples_per_second": 9.288, "eval_steps_per_second": 1.615, "step": 1655 }, { "epoch": 86.96, "eval_gen_len": 5.9478, "eval_loss": 1.642953872680664, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.0847, "eval_samples_per_second": 9.516, "eval_steps_per_second": 1.655, "step": 1674 }, { "epoch": 88.0, "eval_gen_len": 5.4696, "eval_loss": 1.6336156129837036, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.5368, "eval_samples_per_second": 9.173, "eval_steps_per_second": 1.595, "step": 1694 }, { "epoch": 88.99, "eval_gen_len": 5.4, "eval_loss": 1.6246790885925293, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.4452, "eval_samples_per_second": 9.241, "eval_steps_per_second": 1.607, "step": 1713 }, { "epoch": 89.97, "eval_gen_len": 5.7739, "eval_loss": 1.615963339805603, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.0491, "eval_samples_per_second": 9.544, "eval_steps_per_second": 1.66, "step": 1732 }, { "epoch": 90.96, "eval_gen_len": 6.2348, "eval_loss": 1.606810450553894, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.2296, "eval_samples_per_second": 9.403, "eval_steps_per_second": 1.635, "step": 1751 }, { "epoch": 92.0, "eval_gen_len": 6.1652, "eval_loss": 1.59696626663208, "eval_rouge1": 0.0002, "eval_rouge2": 0.0, "eval_rougeL": 0.0002, "eval_rougeLsum": 0.0002, "eval_runtime": 12.4411, "eval_samples_per_second": 9.244, "eval_steps_per_second": 1.608, "step": 1771 }, { "epoch": 92.99, "eval_gen_len": 6.3739, "eval_loss": 1.5894649028778076, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 11.8551, "eval_samples_per_second": 9.7, "eval_steps_per_second": 1.687, "step": 1790 }, { "epoch": 93.97, "eval_gen_len": 6.5043, "eval_loss": 1.5818349123001099, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.2434, "eval_samples_per_second": 9.393, "eval_steps_per_second": 1.634, "step": 1809 }, { "epoch": 94.96, "eval_gen_len": 6.3565, "eval_loss": 1.5746902227401733, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.3859, "eval_samples_per_second": 9.285, "eval_steps_per_second": 1.615, "step": 1828 }, { "epoch": 96.0, "eval_gen_len": 6.8087, "eval_loss": 1.567280650138855, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.1358, "eval_samples_per_second": 9.476, "eval_steps_per_second": 1.648, "step": 1848 }, { "epoch": 96.99, "eval_gen_len": 6.8, "eval_loss": 1.5616425275802612, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.3339, "eval_samples_per_second": 9.324, "eval_steps_per_second": 1.622, "step": 1867 }, { "epoch": 97.97, "eval_gen_len": 6.6522, "eval_loss": 1.5548292398452759, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.1749, "eval_samples_per_second": 9.446, "eval_steps_per_second": 1.643, "step": 1886 }, { "epoch": 98.96, "eval_gen_len": 6.5913, "eval_loss": 1.5485645532608032, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 11.995, "eval_samples_per_second": 9.587, "eval_steps_per_second": 1.667, "step": 1905 }, { "epoch": 100.0, "eval_gen_len": 6.4522, "eval_loss": 1.5418448448181152, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 11.7913, "eval_samples_per_second": 9.753, "eval_steps_per_second": 1.696, "step": 1925 }, { "epoch": 100.99, "eval_gen_len": 5.6957, "eval_loss": 1.5365816354751587, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.5711, "eval_samples_per_second": 9.148, "eval_steps_per_second": 1.591, "step": 1944 }, { "epoch": 101.97, "eval_gen_len": 5.5739, "eval_loss": 1.5312349796295166, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.7436, "eval_samples_per_second": 9.024, "eval_steps_per_second": 1.569, "step": 1963 }, { "epoch": 102.96, "eval_gen_len": 5.4174, "eval_loss": 1.5244060754776, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.3775, "eval_samples_per_second": 9.291, "eval_steps_per_second": 1.616, "step": 1982 }, { "epoch": 103.9, "grad_norm": 1.870866298675537, "learning_rate": 7.898947368421053e-06, "loss": 1.8779, "step": 2000 }, { "epoch": 104.0, "eval_gen_len": 5.3565, "eval_loss": 1.5186233520507812, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.0793, "eval_samples_per_second": 9.52, "eval_steps_per_second": 1.656, "step": 2002 }, { "epoch": 104.99, "eval_gen_len": 5.6174, "eval_loss": 1.5112248659133911, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 11.9289, "eval_samples_per_second": 9.64, "eval_steps_per_second": 1.677, "step": 2021 }, { "epoch": 105.97, "eval_gen_len": 5.9217, "eval_loss": 1.5045664310455322, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 11.9549, "eval_samples_per_second": 9.619, "eval_steps_per_second": 1.673, "step": 2040 }, { "epoch": 106.96, "eval_gen_len": 5.9913, "eval_loss": 1.4977103471755981, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.5176, "eval_samples_per_second": 9.187, "eval_steps_per_second": 1.598, "step": 2059 }, { "epoch": 108.0, "eval_gen_len": 5.4957, "eval_loss": 1.491757869720459, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.36, "eval_samples_per_second": 9.304, "eval_steps_per_second": 1.618, "step": 2079 }, { "epoch": 108.99, "eval_gen_len": 6.0348, "eval_loss": 1.486743688583374, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.5097, "eval_samples_per_second": 9.193, "eval_steps_per_second": 1.599, "step": 2098 }, { "epoch": 109.97, "eval_gen_len": 6.3304, "eval_loss": 1.480473279953003, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 11.9075, "eval_samples_per_second": 9.658, "eval_steps_per_second": 1.68, "step": 2117 }, { "epoch": 110.96, "eval_gen_len": 6.2, "eval_loss": 1.4745731353759766, "eval_rouge1": 0.0003, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 12.2105, "eval_samples_per_second": 9.418, "eval_steps_per_second": 1.638, "step": 2136 }, { "epoch": 112.0, "eval_gen_len": 5.9826, "eval_loss": 1.468475341796875, "eval_rouge1": 0.0009, "eval_rouge2": 0.0, "eval_rougeL": 0.0008, "eval_rougeLsum": 0.0008, "eval_runtime": 12.6855, "eval_samples_per_second": 9.065, "eval_steps_per_second": 1.577, "step": 2156 }, { "epoch": 112.99, "eval_gen_len": 5.8261, "eval_loss": 1.4624364376068115, "eval_rouge1": 0.0009, "eval_rouge2": 0.0, "eval_rougeL": 0.0008, "eval_rougeLsum": 0.0008, "eval_runtime": 12.3065, "eval_samples_per_second": 9.345, "eval_steps_per_second": 1.625, "step": 2175 }, { "epoch": 113.97, "eval_gen_len": 5.487, "eval_loss": 1.4564381837844849, "eval_rouge1": 0.0009, "eval_rouge2": 0.0, "eval_rougeL": 0.0008, "eval_rougeLsum": 0.0008, "eval_runtime": 13.0224, "eval_samples_per_second": 8.831, "eval_steps_per_second": 1.536, "step": 2194 }, { "epoch": 114.96, "eval_gen_len": 5.1565, "eval_loss": 1.4514414072036743, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.4022, "eval_samples_per_second": 9.273, "eval_steps_per_second": 1.613, "step": 2213 }, { "epoch": 116.0, "eval_gen_len": 5.4957, "eval_loss": 1.442409873008728, "eval_rouge1": 0.001, "eval_rouge2": 0.0, "eval_rougeL": 0.0009, "eval_rougeLsum": 0.0009, "eval_runtime": 12.3722, "eval_samples_per_second": 9.295, "eval_steps_per_second": 1.617, "step": 2233 }, { "epoch": 116.99, "eval_gen_len": 5.7391, "eval_loss": 1.4344819784164429, "eval_rouge1": 0.0017, "eval_rouge2": 0.0, "eval_rougeL": 0.0017, "eval_rougeLsum": 0.0018, "eval_runtime": 12.5554, "eval_samples_per_second": 9.159, "eval_steps_per_second": 1.593, "step": 2252 }, { "epoch": 117.97, "eval_gen_len": 6.0435, "eval_loss": 1.4248623847961426, "eval_rouge1": 0.0021, "eval_rouge2": 0.0, "eval_rougeL": 0.002, "eval_rougeLsum": 0.0021, "eval_runtime": 12.1087, "eval_samples_per_second": 9.497, "eval_steps_per_second": 1.652, "step": 2271 }, { "epoch": 118.96, "eval_gen_len": 6.4783, "eval_loss": 1.4156382083892822, "eval_rouge1": 0.0033, "eval_rouge2": 0.0, "eval_rougeL": 0.0032, "eval_rougeLsum": 0.0033, "eval_runtime": 12.0374, "eval_samples_per_second": 9.554, "eval_steps_per_second": 1.661, "step": 2290 }, { "epoch": 120.0, "eval_gen_len": 6.3043, "eval_loss": 1.408909559249878, "eval_rouge1": 0.0038, "eval_rouge2": 0.0, "eval_rougeL": 0.0037, "eval_rougeLsum": 0.0038, "eval_runtime": 12.5996, "eval_samples_per_second": 9.127, "eval_steps_per_second": 1.587, "step": 2310 }, { "epoch": 120.99, "eval_gen_len": 6.1043, "eval_loss": 1.4028282165527344, "eval_rouge1": 0.0043, "eval_rouge2": 0.0, "eval_rougeL": 0.0042, "eval_rougeLsum": 0.0043, "eval_runtime": 12.384, "eval_samples_per_second": 9.286, "eval_steps_per_second": 1.615, "step": 2329 }, { "epoch": 121.97, "eval_gen_len": 5.9478, "eval_loss": 1.3989005088806152, "eval_rouge1": 0.0036, "eval_rouge2": 0.0, "eval_rougeL": 0.0036, "eval_rougeLsum": 0.0037, "eval_runtime": 12.3007, "eval_samples_per_second": 9.349, "eval_steps_per_second": 1.626, "step": 2348 }, { "epoch": 122.96, "eval_gen_len": 5.4348, "eval_loss": 1.3940106630325317, "eval_rouge1": 0.0029, "eval_rouge2": 0.0, "eval_rougeL": 0.0026, "eval_rougeLsum": 0.0026, "eval_runtime": 13.1431, "eval_samples_per_second": 8.75, "eval_steps_per_second": 1.522, "step": 2367 }, { "epoch": 124.0, "eval_gen_len": 5.3913, "eval_loss": 1.387468695640564, "eval_rouge1": 0.0036, "eval_rouge2": 0.0, "eval_rougeL": 0.0034, "eval_rougeLsum": 0.0035, "eval_runtime": 12.0602, "eval_samples_per_second": 9.535, "eval_steps_per_second": 1.658, "step": 2387 }, { "epoch": 124.99, "eval_gen_len": 5.4174, "eval_loss": 1.3833892345428467, "eval_rouge1": 0.0031, "eval_rouge2": 0.0, "eval_rougeL": 0.0031, "eval_rougeLsum": 0.0032, "eval_runtime": 12.5404, "eval_samples_per_second": 9.17, "eval_steps_per_second": 1.595, "step": 2406 }, { "epoch": 125.97, "eval_gen_len": 5.8, "eval_loss": 1.3742746114730835, "eval_rouge1": 0.0034, "eval_rouge2": 0.0, "eval_rougeL": 0.0034, "eval_rougeLsum": 0.0034, "eval_runtime": 12.5374, "eval_samples_per_second": 9.173, "eval_steps_per_second": 1.595, "step": 2425 }, { "epoch": 126.96, "eval_gen_len": 6.2348, "eval_loss": 1.3673855066299438, "eval_rouge1": 0.0054, "eval_rouge2": 0.0, "eval_rougeL": 0.0051, "eval_rougeLsum": 0.0052, "eval_runtime": 12.3787, "eval_samples_per_second": 9.29, "eval_steps_per_second": 1.616, "step": 2444 }, { "epoch": 128.0, "eval_gen_len": 6.3739, "eval_loss": 1.3610302209854126, "eval_rouge1": 0.0051, "eval_rouge2": 0.0, "eval_rougeL": 0.0051, "eval_rougeLsum": 0.0053, "eval_runtime": 12.0722, "eval_samples_per_second": 9.526, "eval_steps_per_second": 1.657, "step": 2464 }, { "epoch": 128.99, "eval_gen_len": 7.1565, "eval_loss": 1.351613163948059, "eval_rouge1": 0.0062, "eval_rouge2": 0.0002, "eval_rougeL": 0.0056, "eval_rougeLsum": 0.0057, "eval_runtime": 11.971, "eval_samples_per_second": 9.607, "eval_steps_per_second": 1.671, "step": 2483 }, { "epoch": 129.87, "grad_norm": 2.394576072692871, "learning_rate": 7.3726315789473694e-06, "loss": 1.6063, "step": 2500 }, { "epoch": 129.97, "eval_gen_len": 7.4522, "eval_loss": 1.3424580097198486, "eval_rouge1": 0.0055, "eval_rouge2": 0.0002, "eval_rougeL": 0.0053, "eval_rougeLsum": 0.0053, "eval_runtime": 12.0884, "eval_samples_per_second": 9.513, "eval_steps_per_second": 1.654, "step": 2502 }, { "epoch": 130.96, "eval_gen_len": 7.4609, "eval_loss": 1.334855556488037, "eval_rouge1": 0.0044, "eval_rouge2": 0.0002, "eval_rougeL": 0.0042, "eval_rougeLsum": 0.0041, "eval_runtime": 11.9536, "eval_samples_per_second": 9.621, "eval_steps_per_second": 1.673, "step": 2521 }, { "epoch": 132.0, "eval_gen_len": 7.4522, "eval_loss": 1.3267827033996582, "eval_rouge1": 0.0048, "eval_rouge2": 0.0, "eval_rougeL": 0.0046, "eval_rougeLsum": 0.0046, "eval_runtime": 12.6532, "eval_samples_per_second": 9.089, "eval_steps_per_second": 1.581, "step": 2541 }, { "epoch": 132.99, "eval_gen_len": 7.8522, "eval_loss": 1.3201897144317627, "eval_rouge1": 0.0096, "eval_rouge2": 0.0002, "eval_rougeL": 0.0088, "eval_rougeLsum": 0.0089, "eval_runtime": 12.2562, "eval_samples_per_second": 9.383, "eval_steps_per_second": 1.632, "step": 2560 }, { "epoch": 133.97, "eval_gen_len": 7.5304, "eval_loss": 1.3138891458511353, "eval_rouge1": 0.0074, "eval_rouge2": 0.0002, "eval_rougeL": 0.0075, "eval_rougeLsum": 0.0075, "eval_runtime": 11.9989, "eval_samples_per_second": 9.584, "eval_steps_per_second": 1.667, "step": 2579 }, { "epoch": 134.96, "eval_gen_len": 7.2348, "eval_loss": 1.3059097528457642, "eval_rouge1": 0.005, "eval_rouge2": 0.0, "eval_rougeL": 0.005, "eval_rougeLsum": 0.0051, "eval_runtime": 11.9785, "eval_samples_per_second": 9.6, "eval_steps_per_second": 1.67, "step": 2598 }, { "epoch": 136.0, "eval_gen_len": 7.1304, "eval_loss": 1.298433780670166, "eval_rouge1": 0.005, "eval_rouge2": 0.0, "eval_rougeL": 0.0046, "eval_rougeLsum": 0.0047, "eval_runtime": 12.1248, "eval_samples_per_second": 9.485, "eval_steps_per_second": 1.65, "step": 2618 }, { "epoch": 136.99, "eval_gen_len": 8.0261, "eval_loss": 1.29219651222229, "eval_rouge1": 0.0072, "eval_rouge2": 0.0, "eval_rougeL": 0.0069, "eval_rougeLsum": 0.0069, "eval_runtime": 12.5125, "eval_samples_per_second": 9.191, "eval_steps_per_second": 1.598, "step": 2637 }, { "epoch": 137.97, "eval_gen_len": 8.4087, "eval_loss": 1.2833046913146973, "eval_rouge1": 0.0108, "eval_rouge2": 0.0, "eval_rougeL": 0.0097, "eval_rougeLsum": 0.0099, "eval_runtime": 12.0943, "eval_samples_per_second": 9.509, "eval_steps_per_second": 1.654, "step": 2656 }, { "epoch": 138.96, "eval_gen_len": 8.3739, "eval_loss": 1.278290033340454, "eval_rouge1": 0.0111, "eval_rouge2": 0.0, "eval_rougeL": 0.0094, "eval_rougeLsum": 0.0095, "eval_runtime": 12.8059, "eval_samples_per_second": 8.98, "eval_steps_per_second": 1.562, "step": 2675 }, { "epoch": 140.0, "eval_gen_len": 8.5043, "eval_loss": 1.2764371633529663, "eval_rouge1": 0.0114, "eval_rouge2": 0.0, "eval_rougeL": 0.0104, "eval_rougeLsum": 0.0108, "eval_runtime": 12.1122, "eval_samples_per_second": 9.495, "eval_steps_per_second": 1.651, "step": 2695 }, { "epoch": 140.99, "eval_gen_len": 8.6261, "eval_loss": 1.2698535919189453, "eval_rouge1": 0.0139, "eval_rouge2": 0.0, "eval_rougeL": 0.0126, "eval_rougeLsum": 0.0128, "eval_runtime": 12.5216, "eval_samples_per_second": 9.184, "eval_steps_per_second": 1.597, "step": 2714 }, { "epoch": 141.97, "eval_gen_len": 8.2435, "eval_loss": 1.2616974115371704, "eval_rouge1": 0.0135, "eval_rouge2": 0.0, "eval_rougeL": 0.0119, "eval_rougeLsum": 0.012, "eval_runtime": 12.2763, "eval_samples_per_second": 9.368, "eval_steps_per_second": 1.629, "step": 2733 }, { "epoch": 142.96, "eval_gen_len": 8.713, "eval_loss": 1.2539962530136108, "eval_rouge1": 0.0144, "eval_rouge2": 0.0, "eval_rougeL": 0.0123, "eval_rougeLsum": 0.0125, "eval_runtime": 12.1565, "eval_samples_per_second": 9.46, "eval_steps_per_second": 1.645, "step": 2752 }, { "epoch": 144.0, "eval_gen_len": 8.9826, "eval_loss": 1.2482250928878784, "eval_rouge1": 0.0153, "eval_rouge2": 0.0002, "eval_rougeL": 0.0137, "eval_rougeLsum": 0.0137, "eval_runtime": 12.3974, "eval_samples_per_second": 9.276, "eval_steps_per_second": 1.613, "step": 2772 }, { "epoch": 144.99, "eval_gen_len": 8.9391, "eval_loss": 1.2442501783370972, "eval_rouge1": 0.0139, "eval_rouge2": 0.0006, "eval_rougeL": 0.013, "eval_rougeLsum": 0.0129, "eval_runtime": 12.5443, "eval_samples_per_second": 9.168, "eval_steps_per_second": 1.594, "step": 2791 }, { "epoch": 145.97, "eval_gen_len": 9.3565, "eval_loss": 1.2381587028503418, "eval_rouge1": 0.0187, "eval_rouge2": 0.0011, "eval_rougeL": 0.0151, "eval_rougeLsum": 0.0151, "eval_runtime": 12.381, "eval_samples_per_second": 9.288, "eval_steps_per_second": 1.615, "step": 2810 }, { "epoch": 146.96, "eval_gen_len": 9.513, "eval_loss": 1.2287580966949463, "eval_rouge1": 0.0202, "eval_rouge2": 0.0008, "eval_rougeL": 0.0172, "eval_rougeLsum": 0.0172, "eval_runtime": 12.6591, "eval_samples_per_second": 9.084, "eval_steps_per_second": 1.58, "step": 2829 }, { "epoch": 148.0, "eval_gen_len": 8.5565, "eval_loss": 1.2264941930770874, "eval_rouge1": 0.0147, "eval_rouge2": 0.0004, "eval_rougeL": 0.0126, "eval_rougeLsum": 0.0124, "eval_runtime": 11.9708, "eval_samples_per_second": 9.607, "eval_steps_per_second": 1.671, "step": 2849 }, { "epoch": 148.99, "eval_gen_len": 8.7652, "eval_loss": 1.222589135169983, "eval_rouge1": 0.0153, "eval_rouge2": 0.0005, "eval_rougeL": 0.0129, "eval_rougeLsum": 0.0129, "eval_runtime": 12.5442, "eval_samples_per_second": 9.168, "eval_steps_per_second": 1.594, "step": 2868 }, { "epoch": 149.97, "eval_gen_len": 8.8435, "eval_loss": 1.2170130014419556, "eval_rouge1": 0.0147, "eval_rouge2": 0.0008, "eval_rougeL": 0.0128, "eval_rougeLsum": 0.0127, "eval_runtime": 12.6121, "eval_samples_per_second": 9.118, "eval_steps_per_second": 1.586, "step": 2887 }, { "epoch": 150.96, "eval_gen_len": 9.4174, "eval_loss": 1.208147406578064, "eval_rouge1": 0.0181, "eval_rouge2": 0.001, "eval_rougeL": 0.0162, "eval_rougeLsum": 0.0162, "eval_runtime": 13.01, "eval_samples_per_second": 8.839, "eval_steps_per_second": 1.537, "step": 2906 }, { "epoch": 152.0, "eval_gen_len": 9.7739, "eval_loss": 1.2039202451705933, "eval_rouge1": 0.0216, "eval_rouge2": 0.0013, "eval_rougeL": 0.019, "eval_rougeLsum": 0.0191, "eval_runtime": 12.7191, "eval_samples_per_second": 9.042, "eval_steps_per_second": 1.572, "step": 2926 }, { "epoch": 152.99, "eval_gen_len": 9.5652, "eval_loss": 1.200941801071167, "eval_rouge1": 0.02, "eval_rouge2": 0.0011, "eval_rougeL": 0.0176, "eval_rougeLsum": 0.0178, "eval_runtime": 12.6301, "eval_samples_per_second": 9.105, "eval_steps_per_second": 1.584, "step": 2945 }, { "epoch": 153.97, "eval_gen_len": 9.4609, "eval_loss": 1.195379376411438, "eval_rouge1": 0.0156, "eval_rouge2": 0.0008, "eval_rougeL": 0.0131, "eval_rougeLsum": 0.0133, "eval_runtime": 12.3176, "eval_samples_per_second": 9.336, "eval_steps_per_second": 1.624, "step": 2964 }, { "epoch": 154.96, "eval_gen_len": 9.6522, "eval_loss": 1.1899113655090332, "eval_rouge1": 0.0181, "eval_rouge2": 0.001, "eval_rougeL": 0.0156, "eval_rougeLsum": 0.0157, "eval_runtime": 12.5731, "eval_samples_per_second": 9.147, "eval_steps_per_second": 1.591, "step": 2983 }, { "epoch": 155.84, "grad_norm": 2.1833202838897705, "learning_rate": 6.846315789473684e-06, "loss": 1.4271, "step": 3000 }, { "epoch": 156.0, "eval_gen_len": 9.4696, "eval_loss": 1.1842440366744995, "eval_rouge1": 0.0203, "eval_rouge2": 0.0008, "eval_rougeL": 0.0174, "eval_rougeLsum": 0.0174, "eval_runtime": 12.2741, "eval_samples_per_second": 9.369, "eval_steps_per_second": 1.629, "step": 3003 }, { "epoch": 156.99, "eval_gen_len": 9.8174, "eval_loss": 1.1782081127166748, "eval_rouge1": 0.0187, "eval_rouge2": 0.0007, "eval_rougeL": 0.0163, "eval_rougeLsum": 0.0165, "eval_runtime": 13.1906, "eval_samples_per_second": 8.718, "eval_steps_per_second": 1.516, "step": 3022 }, { "epoch": 157.97, "eval_gen_len": 9.9304, "eval_loss": 1.173979640007019, "eval_rouge1": 0.0206, "eval_rouge2": 0.0005, "eval_rougeL": 0.018, "eval_rougeLsum": 0.0183, "eval_runtime": 12.6288, "eval_samples_per_second": 9.106, "eval_steps_per_second": 1.584, "step": 3041 }, { "epoch": 158.96, "eval_gen_len": 10.0087, "eval_loss": 1.1698901653289795, "eval_rouge1": 0.0198, "eval_rouge2": 0.0005, "eval_rougeL": 0.0177, "eval_rougeLsum": 0.018, "eval_runtime": 12.5926, "eval_samples_per_second": 9.132, "eval_steps_per_second": 1.588, "step": 3060 }, { "epoch": 160.0, "eval_gen_len": 10.2174, "eval_loss": 1.1631128787994385, "eval_rouge1": 0.0214, "eval_rouge2": 0.0004, "eval_rougeL": 0.0189, "eval_rougeLsum": 0.0191, "eval_runtime": 12.2102, "eval_samples_per_second": 9.418, "eval_steps_per_second": 1.638, "step": 3080 }, { "epoch": 160.99, "eval_gen_len": 10.1304, "eval_loss": 1.1569976806640625, "eval_rouge1": 0.0221, "eval_rouge2": 0.0009, "eval_rougeL": 0.0185, "eval_rougeLsum": 0.0187, "eval_runtime": 12.421, "eval_samples_per_second": 9.259, "eval_steps_per_second": 1.61, "step": 3099 }, { "epoch": 161.97, "eval_gen_len": 10.0609, "eval_loss": 1.1523972749710083, "eval_rouge1": 0.0202, "eval_rouge2": 0.0009, "eval_rougeL": 0.0169, "eval_rougeLsum": 0.0171, "eval_runtime": 12.3412, "eval_samples_per_second": 9.318, "eval_steps_per_second": 1.621, "step": 3118 }, { "epoch": 162.96, "eval_gen_len": 9.8609, "eval_loss": 1.1472958326339722, "eval_rouge1": 0.0202, "eval_rouge2": 0.0006, "eval_rougeL": 0.0173, "eval_rougeLsum": 0.0174, "eval_runtime": 12.2339, "eval_samples_per_second": 9.4, "eval_steps_per_second": 1.635, "step": 3137 }, { "epoch": 164.0, "eval_gen_len": 10.3913, "eval_loss": 1.1415693759918213, "eval_rouge1": 0.0218, "eval_rouge2": 0.0011, "eval_rougeL": 0.0184, "eval_rougeLsum": 0.0183, "eval_runtime": 12.6611, "eval_samples_per_second": 9.083, "eval_steps_per_second": 1.58, "step": 3157 }, { "epoch": 164.99, "eval_gen_len": 9.713, "eval_loss": 1.135535478591919, "eval_rouge1": 0.0174, "eval_rouge2": 0.0005, "eval_rougeL": 0.0148, "eval_rougeLsum": 0.0146, "eval_runtime": 12.6693, "eval_samples_per_second": 9.077, "eval_steps_per_second": 1.579, "step": 3176 }, { "epoch": 165.97, "eval_gen_len": 10.113, "eval_loss": 1.1300948858261108, "eval_rouge1": 0.0185, "eval_rouge2": 0.0007, "eval_rougeL": 0.0151, "eval_rougeLsum": 0.0151, "eval_runtime": 12.211, "eval_samples_per_second": 9.418, "eval_steps_per_second": 1.638, "step": 3195 }, { "epoch": 166.96, "eval_gen_len": 10.1043, "eval_loss": 1.125083088874817, "eval_rouge1": 0.0205, "eval_rouge2": 0.0008, "eval_rougeL": 0.0165, "eval_rougeLsum": 0.0164, "eval_runtime": 12.1409, "eval_samples_per_second": 9.472, "eval_steps_per_second": 1.647, "step": 3214 }, { "epoch": 168.0, "eval_gen_len": 10.2348, "eval_loss": 1.1202179193496704, "eval_rouge1": 0.0195, "eval_rouge2": 0.0008, "eval_rougeL": 0.0159, "eval_rougeLsum": 0.0159, "eval_runtime": 12.1317, "eval_samples_per_second": 9.479, "eval_steps_per_second": 1.649, "step": 3234 }, { "epoch": 168.99, "eval_gen_len": 10.8957, "eval_loss": 1.114139199256897, "eval_rouge1": 0.0243, "eval_rouge2": 0.0018, "eval_rougeL": 0.0198, "eval_rougeLsum": 0.0198, "eval_runtime": 12.443, "eval_samples_per_second": 9.242, "eval_steps_per_second": 1.607, "step": 3253 }, { "epoch": 169.97, "eval_gen_len": 11.0174, "eval_loss": 1.1090463399887085, "eval_rouge1": 0.0202, "eval_rouge2": 0.0013, "eval_rougeL": 0.0161, "eval_rougeLsum": 0.0163, "eval_runtime": 12.144, "eval_samples_per_second": 9.47, "eval_steps_per_second": 1.647, "step": 3272 }, { "epoch": 170.96, "eval_gen_len": 11.313, "eval_loss": 1.1036903858184814, "eval_rouge1": 0.0223, "eval_rouge2": 0.0015, "eval_rougeL": 0.0186, "eval_rougeLsum": 0.0186, "eval_runtime": 12.3793, "eval_samples_per_second": 9.29, "eval_steps_per_second": 1.616, "step": 3291 }, { "epoch": 172.0, "eval_gen_len": 11.3739, "eval_loss": 1.0987364053726196, "eval_rouge1": 0.0212, "eval_rouge2": 0.0013, "eval_rougeL": 0.0178, "eval_rougeLsum": 0.0179, "eval_runtime": 12.3964, "eval_samples_per_second": 9.277, "eval_steps_per_second": 1.613, "step": 3311 }, { "epoch": 172.99, "eval_gen_len": 11.2522, "eval_loss": 1.0937457084655762, "eval_rouge1": 0.0219, "eval_rouge2": 0.0015, "eval_rougeL": 0.0182, "eval_rougeLsum": 0.018, "eval_runtime": 12.5831, "eval_samples_per_second": 9.139, "eval_steps_per_second": 1.589, "step": 3330 }, { "epoch": 173.97, "eval_gen_len": 11.2174, "eval_loss": 1.090100646018982, "eval_rouge1": 0.0199, "eval_rouge2": 0.0013, "eval_rougeL": 0.0162, "eval_rougeLsum": 0.0163, "eval_runtime": 12.7101, "eval_samples_per_second": 9.048, "eval_steps_per_second": 1.574, "step": 3349 }, { "epoch": 174.96, "eval_gen_len": 11.2174, "eval_loss": 1.0861694812774658, "eval_rouge1": 0.018, "eval_rouge2": 0.0011, "eval_rougeL": 0.0149, "eval_rougeLsum": 0.0149, "eval_runtime": 12.2255, "eval_samples_per_second": 9.407, "eval_steps_per_second": 1.636, "step": 3368 }, { "epoch": 176.0, "eval_gen_len": 11.3304, "eval_loss": 1.080249309539795, "eval_rouge1": 0.0181, "eval_rouge2": 0.0013, "eval_rougeL": 0.0154, "eval_rougeLsum": 0.0154, "eval_runtime": 12.2755, "eval_samples_per_second": 9.368, "eval_steps_per_second": 1.629, "step": 3388 }, { "epoch": 176.99, "eval_gen_len": 10.9739, "eval_loss": 1.0751179456710815, "eval_rouge1": 0.0147, "eval_rouge2": 0.0012, "eval_rougeL": 0.0124, "eval_rougeLsum": 0.0125, "eval_runtime": 12.3848, "eval_samples_per_second": 9.286, "eval_steps_per_second": 1.615, "step": 3407 }, { "epoch": 177.97, "eval_gen_len": 10.8087, "eval_loss": 1.069909930229187, "eval_rouge1": 0.0149, "eval_rouge2": 0.001, "eval_rougeL": 0.0123, "eval_rougeLsum": 0.0124, "eval_runtime": 12.6526, "eval_samples_per_second": 9.089, "eval_steps_per_second": 1.581, "step": 3426 }, { "epoch": 178.96, "eval_gen_len": 10.7217, "eval_loss": 1.0651546716690063, "eval_rouge1": 0.0134, "eval_rouge2": 0.0011, "eval_rougeL": 0.0122, "eval_rougeLsum": 0.0122, "eval_runtime": 13.0395, "eval_samples_per_second": 8.819, "eval_steps_per_second": 1.534, "step": 3445 }, { "epoch": 180.0, "eval_gen_len": 10.6174, "eval_loss": 1.060491681098938, "eval_rouge1": 0.0121, "eval_rouge2": 0.001, "eval_rougeL": 0.0101, "eval_rougeLsum": 0.0101, "eval_runtime": 12.3246, "eval_samples_per_second": 9.331, "eval_steps_per_second": 1.623, "step": 3465 }, { "epoch": 180.99, "eval_gen_len": 10.6435, "eval_loss": 1.0562814474105835, "eval_rouge1": 0.0131, "eval_rouge2": 0.0007, "eval_rougeL": 0.0113, "eval_rougeLsum": 0.0111, "eval_runtime": 13.1955, "eval_samples_per_second": 8.715, "eval_steps_per_second": 1.516, "step": 3484 }, { "epoch": 181.82, "grad_norm": 0.9931882619857788, "learning_rate": 6.3200000000000005e-06, "loss": 1.265, "step": 3500 }, { "epoch": 181.97, "eval_gen_len": 10.3913, "eval_loss": 1.0519821643829346, "eval_rouge1": 0.0147, "eval_rouge2": 0.0019, "eval_rougeL": 0.0129, "eval_rougeLsum": 0.0125, "eval_runtime": 12.3478, "eval_samples_per_second": 9.313, "eval_steps_per_second": 1.62, "step": 3503 }, { "epoch": 182.96, "eval_gen_len": 10.9826, "eval_loss": 1.047600507736206, "eval_rouge1": 0.0171, "eval_rouge2": 0.0018, "eval_rougeL": 0.0148, "eval_rougeLsum": 0.0148, "eval_runtime": 12.7548, "eval_samples_per_second": 9.016, "eval_steps_per_second": 1.568, "step": 3522 }, { "epoch": 184.0, "eval_gen_len": 10.9478, "eval_loss": 1.0428956747055054, "eval_rouge1": 0.019, "eval_rouge2": 0.0026, "eval_rougeL": 0.0173, "eval_rougeLsum": 0.0174, "eval_runtime": 12.668, "eval_samples_per_second": 9.078, "eval_steps_per_second": 1.579, "step": 3542 }, { "epoch": 184.99, "eval_gen_len": 10.6348, "eval_loss": 1.0391294956207275, "eval_rouge1": 0.0192, "eval_rouge2": 0.0019, "eval_rougeL": 0.016, "eval_rougeLsum": 0.0162, "eval_runtime": 12.2592, "eval_samples_per_second": 9.381, "eval_steps_per_second": 1.631, "step": 3561 }, { "epoch": 185.97, "eval_gen_len": 10.6, "eval_loss": 1.0354028940200806, "eval_rouge1": 0.0192, "eval_rouge2": 0.0021, "eval_rougeL": 0.0154, "eval_rougeLsum": 0.0155, "eval_runtime": 12.3279, "eval_samples_per_second": 9.328, "eval_steps_per_second": 1.622, "step": 3580 }, { "epoch": 186.96, "eval_gen_len": 10.2261, "eval_loss": 1.0318480730056763, "eval_rouge1": 0.0193, "eval_rouge2": 0.003, "eval_rougeL": 0.0162, "eval_rougeLsum": 0.0163, "eval_runtime": 12.7454, "eval_samples_per_second": 9.023, "eval_steps_per_second": 1.569, "step": 3599 }, { "epoch": 188.0, "eval_gen_len": 10.6261, "eval_loss": 1.0279144048690796, "eval_rouge1": 0.0245, "eval_rouge2": 0.0032, "eval_rougeL": 0.0201, "eval_rougeLsum": 0.02, "eval_runtime": 12.5706, "eval_samples_per_second": 9.148, "eval_steps_per_second": 1.591, "step": 3619 }, { "epoch": 188.99, "eval_gen_len": 10.5913, "eval_loss": 1.0238802433013916, "eval_rouge1": 0.025, "eval_rouge2": 0.0029, "eval_rougeL": 0.0206, "eval_rougeLsum": 0.0207, "eval_runtime": 12.4275, "eval_samples_per_second": 9.254, "eval_steps_per_second": 1.609, "step": 3638 }, { "epoch": 189.97, "eval_gen_len": 10.2261, "eval_loss": 1.0197361707687378, "eval_rouge1": 0.0249, "eval_rouge2": 0.0029, "eval_rougeL": 0.0198, "eval_rougeLsum": 0.0199, "eval_runtime": 12.6121, "eval_samples_per_second": 9.118, "eval_steps_per_second": 1.586, "step": 3657 }, { "epoch": 190.96, "eval_gen_len": 10.1391, "eval_loss": 1.0159963369369507, "eval_rouge1": 0.0245, "eval_rouge2": 0.003, "eval_rougeL": 0.019, "eval_rougeLsum": 0.0191, "eval_runtime": 12.81, "eval_samples_per_second": 8.977, "eval_steps_per_second": 1.561, "step": 3676 }, { "epoch": 192.0, "eval_gen_len": 10.2435, "eval_loss": 1.0119863748550415, "eval_rouge1": 0.0243, "eval_rouge2": 0.0027, "eval_rougeL": 0.019, "eval_rougeLsum": 0.019, "eval_runtime": 12.53, "eval_samples_per_second": 9.178, "eval_steps_per_second": 1.596, "step": 3696 }, { "epoch": 192.99, "eval_gen_len": 10.3826, "eval_loss": 1.008431315422058, "eval_rouge1": 0.0247, "eval_rouge2": 0.0029, "eval_rougeL": 0.0194, "eval_rougeLsum": 0.0193, "eval_runtime": 12.6196, "eval_samples_per_second": 9.113, "eval_steps_per_second": 1.585, "step": 3715 }, { "epoch": 193.97, "eval_gen_len": 10.6696, "eval_loss": 1.0049232244491577, "eval_rouge1": 0.0239, "eval_rouge2": 0.0027, "eval_rougeL": 0.0186, "eval_rougeLsum": 0.0185, "eval_runtime": 12.5612, "eval_samples_per_second": 9.155, "eval_steps_per_second": 1.592, "step": 3734 }, { "epoch": 194.96, "eval_gen_len": 11.1043, "eval_loss": 1.0015385150909424, "eval_rouge1": 0.0248, "eval_rouge2": 0.0029, "eval_rougeL": 0.0195, "eval_rougeLsum": 0.0195, "eval_runtime": 11.9572, "eval_samples_per_second": 9.618, "eval_steps_per_second": 1.673, "step": 3753 }, { "epoch": 196.0, "eval_gen_len": 10.8609, "eval_loss": 0.9973338842391968, "eval_rouge1": 0.0233, "eval_rouge2": 0.0026, "eval_rougeL": 0.0189, "eval_rougeLsum": 0.0189, "eval_runtime": 11.9829, "eval_samples_per_second": 9.597, "eval_steps_per_second": 1.669, "step": 3773 }, { "epoch": 196.99, "eval_gen_len": 10.6783, "eval_loss": 0.9933551549911499, "eval_rouge1": 0.0209, "eval_rouge2": 0.0028, "eval_rougeL": 0.0172, "eval_rougeLsum": 0.0172, "eval_runtime": 12.3195, "eval_samples_per_second": 9.335, "eval_steps_per_second": 1.623, "step": 3792 }, { "epoch": 197.97, "eval_gen_len": 10.9043, "eval_loss": 0.9898021817207336, "eval_rouge1": 0.0224, "eval_rouge2": 0.0028, "eval_rougeL": 0.0183, "eval_rougeLsum": 0.0182, "eval_runtime": 12.6246, "eval_samples_per_second": 9.109, "eval_steps_per_second": 1.584, "step": 3811 }, { "epoch": 198.96, "eval_gen_len": 11.2435, "eval_loss": 0.9868430495262146, "eval_rouge1": 0.0223, "eval_rouge2": 0.0034, "eval_rougeL": 0.0186, "eval_rougeLsum": 0.0186, "eval_runtime": 12.4605, "eval_samples_per_second": 9.229, "eval_steps_per_second": 1.605, "step": 3830 }, { "epoch": 200.0, "eval_gen_len": 11.1565, "eval_loss": 0.9835883975028992, "eval_rouge1": 0.0212, "eval_rouge2": 0.0033, "eval_rougeL": 0.0182, "eval_rougeLsum": 0.018, "eval_runtime": 12.0605, "eval_samples_per_second": 9.535, "eval_steps_per_second": 1.658, "step": 3850 }, { "epoch": 200.99, "eval_gen_len": 11.2087, "eval_loss": 0.9812867641448975, "eval_rouge1": 0.0202, "eval_rouge2": 0.003, "eval_rougeL": 0.0164, "eval_rougeLsum": 0.0164, "eval_runtime": 12.6958, "eval_samples_per_second": 9.058, "eval_steps_per_second": 1.575, "step": 3869 }, { "epoch": 201.97, "eval_gen_len": 11.2783, "eval_loss": 0.9780998229980469, "eval_rouge1": 0.0192, "eval_rouge2": 0.0032, "eval_rougeL": 0.0158, "eval_rougeLsum": 0.0158, "eval_runtime": 12.2767, "eval_samples_per_second": 9.367, "eval_steps_per_second": 1.629, "step": 3888 }, { "epoch": 202.96, "eval_gen_len": 11.113, "eval_loss": 0.9748227596282959, "eval_rouge1": 0.0174, "eval_rouge2": 0.0028, "eval_rougeL": 0.0144, "eval_rougeLsum": 0.0144, "eval_runtime": 12.2251, "eval_samples_per_second": 9.407, "eval_steps_per_second": 1.636, "step": 3907 }, { "epoch": 204.0, "eval_gen_len": 11.3304, "eval_loss": 0.9713881015777588, "eval_rouge1": 0.0187, "eval_rouge2": 0.0026, "eval_rougeL": 0.0157, "eval_rougeLsum": 0.0157, "eval_runtime": 12.1776, "eval_samples_per_second": 9.444, "eval_steps_per_second": 1.642, "step": 3927 }, { "epoch": 204.99, "eval_gen_len": 11.5043, "eval_loss": 0.968216598033905, "eval_rouge1": 0.0199, "eval_rouge2": 0.0026, "eval_rougeL": 0.0164, "eval_rougeLsum": 0.0166, "eval_runtime": 12.4259, "eval_samples_per_second": 9.255, "eval_steps_per_second": 1.61, "step": 3946 }, { "epoch": 205.97, "eval_gen_len": 11.4261, "eval_loss": 0.9647319912910461, "eval_rouge1": 0.0184, "eval_rouge2": 0.0025, "eval_rougeL": 0.0154, "eval_rougeLsum": 0.0154, "eval_runtime": 12.6085, "eval_samples_per_second": 9.121, "eval_steps_per_second": 1.586, "step": 3965 }, { "epoch": 206.96, "eval_gen_len": 11.6087, "eval_loss": 0.9613582491874695, "eval_rouge1": 0.0172, "eval_rouge2": 0.0018, "eval_rougeL": 0.0146, "eval_rougeLsum": 0.0145, "eval_runtime": 12.5542, "eval_samples_per_second": 9.16, "eval_steps_per_second": 1.593, "step": 3984 }, { "epoch": 207.79, "grad_norm": 0.6749991178512573, "learning_rate": 5.793684210526316e-06, "loss": 1.119, "step": 4000 }, { "epoch": 208.0, "eval_gen_len": 11.8087, "eval_loss": 0.9580429196357727, "eval_rouge1": 0.0206, "eval_rouge2": 0.0023, "eval_rougeL": 0.0168, "eval_rougeLsum": 0.0167, "eval_runtime": 12.4965, "eval_samples_per_second": 9.203, "eval_steps_per_second": 1.6, "step": 4004 }, { "epoch": 208.99, "eval_gen_len": 12.0957, "eval_loss": 0.9548400640487671, "eval_rouge1": 0.0233, "eval_rouge2": 0.0023, "eval_rougeL": 0.019, "eval_rougeLsum": 0.019, "eval_runtime": 13.061, "eval_samples_per_second": 8.805, "eval_steps_per_second": 1.531, "step": 4023 }, { "epoch": 209.97, "eval_gen_len": 11.9826, "eval_loss": 0.9517626166343689, "eval_rouge1": 0.0214, "eval_rouge2": 0.0021, "eval_rougeL": 0.0181, "eval_rougeLsum": 0.018, "eval_runtime": 12.6476, "eval_samples_per_second": 9.093, "eval_steps_per_second": 1.581, "step": 4042 }, { "epoch": 210.96, "eval_gen_len": 11.9304, "eval_loss": 0.9485481381416321, "eval_rouge1": 0.0208, "eval_rouge2": 0.0018, "eval_rougeL": 0.0171, "eval_rougeLsum": 0.0172, "eval_runtime": 12.4352, "eval_samples_per_second": 9.248, "eval_steps_per_second": 1.608, "step": 4061 }, { "epoch": 212.0, "eval_gen_len": 11.7826, "eval_loss": 0.9455087184906006, "eval_rouge1": 0.0184, "eval_rouge2": 0.0016, "eval_rougeL": 0.0152, "eval_rougeLsum": 0.015, "eval_runtime": 12.868, "eval_samples_per_second": 8.937, "eval_steps_per_second": 1.554, "step": 4081 }, { "epoch": 212.99, "eval_gen_len": 11.7565, "eval_loss": 0.9424554109573364, "eval_rouge1": 0.0186, "eval_rouge2": 0.0028, "eval_rougeL": 0.0153, "eval_rougeLsum": 0.0153, "eval_runtime": 12.3925, "eval_samples_per_second": 9.28, "eval_steps_per_second": 1.614, "step": 4100 }, { "epoch": 213.97, "eval_gen_len": 11.3913, "eval_loss": 0.939349889755249, "eval_rouge1": 0.0165, "eval_rouge2": 0.002, "eval_rougeL": 0.0131, "eval_rougeLsum": 0.0131, "eval_runtime": 12.3014, "eval_samples_per_second": 9.349, "eval_steps_per_second": 1.626, "step": 4119 }, { "epoch": 214.96, "eval_gen_len": 11.4522, "eval_loss": 0.9365057349205017, "eval_rouge1": 0.0177, "eval_rouge2": 0.0022, "eval_rougeL": 0.0143, "eval_rougeLsum": 0.0143, "eval_runtime": 12.8423, "eval_samples_per_second": 8.955, "eval_steps_per_second": 1.557, "step": 4138 }, { "epoch": 216.0, "eval_gen_len": 11.7391, "eval_loss": 0.9332289099693298, "eval_rouge1": 0.0213, "eval_rouge2": 0.0028, "eval_rougeL": 0.0177, "eval_rougeLsum": 0.0173, "eval_runtime": 12.4944, "eval_samples_per_second": 9.204, "eval_steps_per_second": 1.601, "step": 4158 }, { "epoch": 216.99, "eval_gen_len": 11.6522, "eval_loss": 0.9310381412506104, "eval_rouge1": 0.0197, "eval_rouge2": 0.0028, "eval_rougeL": 0.0159, "eval_rougeLsum": 0.0157, "eval_runtime": 12.5982, "eval_samples_per_second": 9.128, "eval_steps_per_second": 1.588, "step": 4177 }, { "epoch": 217.97, "eval_gen_len": 11.687, "eval_loss": 0.9279318451881409, "eval_rouge1": 0.0203, "eval_rouge2": 0.0029, "eval_rougeL": 0.0168, "eval_rougeLsum": 0.0165, "eval_runtime": 12.2551, "eval_samples_per_second": 9.384, "eval_steps_per_second": 1.632, "step": 4196 }, { "epoch": 218.96, "eval_gen_len": 11.7043, "eval_loss": 0.9249460697174072, "eval_rouge1": 0.0228, "eval_rouge2": 0.0032, "eval_rougeL": 0.0191, "eval_rougeLsum": 0.019, "eval_runtime": 12.661, "eval_samples_per_second": 9.083, "eval_steps_per_second": 1.58, "step": 4215 }, { "epoch": 220.0, "eval_gen_len": 11.2783, "eval_loss": 0.9218883514404297, "eval_rouge1": 0.0219, "eval_rouge2": 0.0032, "eval_rougeL": 0.0182, "eval_rougeLsum": 0.018, "eval_runtime": 12.5398, "eval_samples_per_second": 9.171, "eval_steps_per_second": 1.595, "step": 4235 }, { "epoch": 220.99, "eval_gen_len": 11.0087, "eval_loss": 0.9194144010543823, "eval_rouge1": 0.0203, "eval_rouge2": 0.0029, "eval_rougeL": 0.0171, "eval_rougeLsum": 0.0167, "eval_runtime": 12.4516, "eval_samples_per_second": 9.236, "eval_steps_per_second": 1.606, "step": 4254 }, { "epoch": 221.97, "eval_gen_len": 10.8174, "eval_loss": 0.9165053963661194, "eval_rouge1": 0.0197, "eval_rouge2": 0.0021, "eval_rougeL": 0.0164, "eval_rougeLsum": 0.0161, "eval_runtime": 12.4796, "eval_samples_per_second": 9.215, "eval_steps_per_second": 1.603, "step": 4273 }, { "epoch": 222.96, "eval_gen_len": 10.9652, "eval_loss": 0.9133741855621338, "eval_rouge1": 0.0226, "eval_rouge2": 0.0027, "eval_rougeL": 0.0185, "eval_rougeLsum": 0.0182, "eval_runtime": 12.2854, "eval_samples_per_second": 9.361, "eval_steps_per_second": 1.628, "step": 4292 }, { "epoch": 224.0, "eval_gen_len": 10.7565, "eval_loss": 0.9105393886566162, "eval_rouge1": 0.0245, "eval_rouge2": 0.0032, "eval_rougeL": 0.0199, "eval_rougeLsum": 0.0197, "eval_runtime": 12.0278, "eval_samples_per_second": 9.561, "eval_steps_per_second": 1.663, "step": 4312 }, { "epoch": 224.99, "eval_gen_len": 10.1391, "eval_loss": 0.907561719417572, "eval_rouge1": 0.0198, "eval_rouge2": 0.0025, "eval_rougeL": 0.0163, "eval_rougeLsum": 0.0161, "eval_runtime": 12.2338, "eval_samples_per_second": 9.4, "eval_steps_per_second": 1.635, "step": 4331 }, { "epoch": 225.97, "eval_gen_len": 9.8522, "eval_loss": 0.9046717286109924, "eval_rouge1": 0.0171, "eval_rouge2": 0.0029, "eval_rougeL": 0.0145, "eval_rougeLsum": 0.0141, "eval_runtime": 12.5334, "eval_samples_per_second": 9.175, "eval_steps_per_second": 1.596, "step": 4350 }, { "epoch": 226.96, "eval_gen_len": 9.6, "eval_loss": 0.9021281599998474, "eval_rouge1": 0.0167, "eval_rouge2": 0.0025, "eval_rougeL": 0.0145, "eval_rougeLsum": 0.0141, "eval_runtime": 12.1542, "eval_samples_per_second": 9.462, "eval_steps_per_second": 1.646, "step": 4369 }, { "epoch": 228.0, "eval_gen_len": 9.6261, "eval_loss": 0.8991298675537109, "eval_rouge1": 0.0181, "eval_rouge2": 0.0019, "eval_rougeL": 0.0153, "eval_rougeLsum": 0.0148, "eval_runtime": 12.1167, "eval_samples_per_second": 9.491, "eval_steps_per_second": 1.651, "step": 4389 }, { "epoch": 228.99, "eval_gen_len": 9.687, "eval_loss": 0.8962268829345703, "eval_rouge1": 0.0217, "eval_rouge2": 0.0027, "eval_rougeL": 0.0176, "eval_rougeLsum": 0.0172, "eval_runtime": 12.1668, "eval_samples_per_second": 9.452, "eval_steps_per_second": 1.644, "step": 4408 }, { "epoch": 229.97, "eval_gen_len": 9.2435, "eval_loss": 0.8939462304115295, "eval_rouge1": 0.0223, "eval_rouge2": 0.0029, "eval_rougeL": 0.0178, "eval_rougeLsum": 0.0175, "eval_runtime": 12.2423, "eval_samples_per_second": 9.394, "eval_steps_per_second": 1.634, "step": 4427 }, { "epoch": 230.96, "eval_gen_len": 9.1304, "eval_loss": 0.8907042145729065, "eval_rouge1": 0.0216, "eval_rouge2": 0.0029, "eval_rougeL": 0.0179, "eval_rougeLsum": 0.0175, "eval_runtime": 12.2251, "eval_samples_per_second": 9.407, "eval_steps_per_second": 1.636, "step": 4446 }, { "epoch": 232.0, "eval_gen_len": 8.9652, "eval_loss": 0.8877010345458984, "eval_rouge1": 0.0211, "eval_rouge2": 0.0025, "eval_rougeL": 0.0169, "eval_rougeLsum": 0.0166, "eval_runtime": 12.123, "eval_samples_per_second": 9.486, "eval_steps_per_second": 1.65, "step": 4466 }, { "epoch": 232.99, "eval_gen_len": 8.7739, "eval_loss": 0.8858217597007751, "eval_rouge1": 0.0209, "eval_rouge2": 0.0027, "eval_rougeL": 0.0168, "eval_rougeLsum": 0.0164, "eval_runtime": 12.3317, "eval_samples_per_second": 9.326, "eval_steps_per_second": 1.622, "step": 4485 }, { "epoch": 233.77, "grad_norm": 0.45308393239974976, "learning_rate": 5.267368421052632e-06, "loss": 1.0189, "step": 4500 }, { "epoch": 233.97, "eval_gen_len": 8.6087, "eval_loss": 0.8837451934814453, "eval_rouge1": 0.0221, "eval_rouge2": 0.0032, "eval_rougeL": 0.0177, "eval_rougeLsum": 0.0173, "eval_runtime": 13.1885, "eval_samples_per_second": 8.72, "eval_steps_per_second": 1.516, "step": 4504 }, { "epoch": 234.96, "eval_gen_len": 8.487, "eval_loss": 0.8812865614891052, "eval_rouge1": 0.0224, "eval_rouge2": 0.003, "eval_rougeL": 0.0175, "eval_rougeLsum": 0.0172, "eval_runtime": 12.2108, "eval_samples_per_second": 9.418, "eval_steps_per_second": 1.638, "step": 4523 }, { "epoch": 236.0, "eval_gen_len": 8.2957, "eval_loss": 0.8780920505523682, "eval_rouge1": 0.0225, "eval_rouge2": 0.0028, "eval_rougeL": 0.0171, "eval_rougeLsum": 0.0168, "eval_runtime": 12.2449, "eval_samples_per_second": 9.392, "eval_steps_per_second": 1.633, "step": 4543 }, { "epoch": 236.99, "eval_gen_len": 7.9304, "eval_loss": 0.8753093481063843, "eval_rouge1": 0.0215, "eval_rouge2": 0.0027, "eval_rougeL": 0.016, "eval_rougeLsum": 0.0158, "eval_runtime": 12.4484, "eval_samples_per_second": 9.238, "eval_steps_per_second": 1.607, "step": 4562 }, { "epoch": 237.97, "eval_gen_len": 7.8174, "eval_loss": 0.8730840086936951, "eval_rouge1": 0.0211, "eval_rouge2": 0.0027, "eval_rougeL": 0.016, "eval_rougeLsum": 0.0156, "eval_runtime": 12.5923, "eval_samples_per_second": 9.133, "eval_steps_per_second": 1.588, "step": 4581 }, { "epoch": 238.96, "eval_gen_len": 7.687, "eval_loss": 0.8703946471214294, "eval_rouge1": 0.0209, "eval_rouge2": 0.0027, "eval_rougeL": 0.0158, "eval_rougeLsum": 0.0154, "eval_runtime": 12.8465, "eval_samples_per_second": 8.952, "eval_steps_per_second": 1.557, "step": 4600 }, { "epoch": 240.0, "eval_gen_len": 7.3652, "eval_loss": 0.8674846887588501, "eval_rouge1": 0.0211, "eval_rouge2": 0.0027, "eval_rougeL": 0.0158, "eval_rougeLsum": 0.0154, "eval_runtime": 12.4238, "eval_samples_per_second": 9.256, "eval_steps_per_second": 1.61, "step": 4620 }, { "epoch": 240.99, "eval_gen_len": 7.2609, "eval_loss": 0.8647277355194092, "eval_rouge1": 0.0204, "eval_rouge2": 0.0022, "eval_rougeL": 0.0147, "eval_rougeLsum": 0.0143, "eval_runtime": 12.6703, "eval_samples_per_second": 9.076, "eval_steps_per_second": 1.578, "step": 4639 }, { "epoch": 241.97, "eval_gen_len": 7.0609, "eval_loss": 0.8625157475471497, "eval_rouge1": 0.0206, "eval_rouge2": 0.0023, "eval_rougeL": 0.0152, "eval_rougeLsum": 0.0149, "eval_runtime": 12.5819, "eval_samples_per_second": 9.14, "eval_steps_per_second": 1.59, "step": 4658 }, { "epoch": 242.96, "eval_gen_len": 6.5652, "eval_loss": 0.8605428338050842, "eval_rouge1": 0.0182, "eval_rouge2": 0.0017, "eval_rougeL": 0.0133, "eval_rougeLsum": 0.0131, "eval_runtime": 13.1768, "eval_samples_per_second": 8.727, "eval_steps_per_second": 1.518, "step": 4677 }, { "epoch": 244.0, "eval_gen_len": 6.4261, "eval_loss": 0.8578657507896423, "eval_rouge1": 0.0177, "eval_rouge2": 0.0021, "eval_rougeL": 0.0134, "eval_rougeLsum": 0.0131, "eval_runtime": 12.6947, "eval_samples_per_second": 9.059, "eval_steps_per_second": 1.575, "step": 4697 }, { "epoch": 244.99, "eval_gen_len": 6.2783, "eval_loss": 0.8557173609733582, "eval_rouge1": 0.0177, "eval_rouge2": 0.0021, "eval_rougeL": 0.0134, "eval_rougeLsum": 0.013, "eval_runtime": 12.5391, "eval_samples_per_second": 9.171, "eval_steps_per_second": 1.595, "step": 4716 }, { "epoch": 245.97, "eval_gen_len": 6.2435, "eval_loss": 0.8529919981956482, "eval_rouge1": 0.0169, "eval_rouge2": 0.0014, "eval_rougeL": 0.0131, "eval_rougeLsum": 0.0127, "eval_runtime": 12.7454, "eval_samples_per_second": 9.023, "eval_steps_per_second": 1.569, "step": 4735 }, { "epoch": 246.96, "eval_gen_len": 6.1565, "eval_loss": 0.850603461265564, "eval_rouge1": 0.0191, "eval_rouge2": 0.0019, "eval_rougeL": 0.0145, "eval_rougeLsum": 0.0141, "eval_runtime": 12.284, "eval_samples_per_second": 9.362, "eval_steps_per_second": 1.628, "step": 4754 }, { "epoch": 248.0, "eval_gen_len": 5.9478, "eval_loss": 0.8480112552642822, "eval_rouge1": 0.0186, "eval_rouge2": 0.0015, "eval_rougeL": 0.0146, "eval_rougeLsum": 0.0142, "eval_runtime": 12.3791, "eval_samples_per_second": 9.29, "eval_steps_per_second": 1.616, "step": 4774 }, { "epoch": 248.99, "eval_gen_len": 5.7043, "eval_loss": 0.8458153009414673, "eval_rouge1": 0.0173, "eval_rouge2": 0.0013, "eval_rougeL": 0.0137, "eval_rougeLsum": 0.0131, "eval_runtime": 12.4132, "eval_samples_per_second": 9.264, "eval_steps_per_second": 1.611, "step": 4793 }, { "epoch": 249.97, "eval_gen_len": 5.7478, "eval_loss": 0.8430487513542175, "eval_rouge1": 0.0169, "eval_rouge2": 0.0015, "eval_rougeL": 0.0136, "eval_rougeLsum": 0.0133, "eval_runtime": 12.8141, "eval_samples_per_second": 8.974, "eval_steps_per_second": 1.561, "step": 4812 }, { "epoch": 250.96, "eval_gen_len": 5.3739, "eval_loss": 0.841323971748352, "eval_rouge1": 0.0152, "eval_rouge2": 0.0016, "eval_rougeL": 0.0124, "eval_rougeLsum": 0.0121, "eval_runtime": 12.5346, "eval_samples_per_second": 9.175, "eval_steps_per_second": 1.596, "step": 4831 }, { "epoch": 252.0, "eval_gen_len": 5.1565, "eval_loss": 0.838948130607605, "eval_rouge1": 0.0149, "eval_rouge2": 0.0011, "eval_rougeL": 0.0124, "eval_rougeLsum": 0.012, "eval_runtime": 13.0143, "eval_samples_per_second": 8.836, "eval_steps_per_second": 1.537, "step": 4851 }, { "epoch": 252.99, "eval_gen_len": 4.9739, "eval_loss": 0.8368021249771118, "eval_rouge1": 0.0148, "eval_rouge2": 0.0011, "eval_rougeL": 0.0123, "eval_rougeLsum": 0.0119, "eval_runtime": 12.4217, "eval_samples_per_second": 9.258, "eval_steps_per_second": 1.61, "step": 4870 }, { "epoch": 253.97, "eval_gen_len": 4.9652, "eval_loss": 0.8342902660369873, "eval_rouge1": 0.0158, "eval_rouge2": 0.0011, "eval_rougeL": 0.013, "eval_rougeLsum": 0.0127, "eval_runtime": 12.2067, "eval_samples_per_second": 9.421, "eval_steps_per_second": 1.638, "step": 4889 }, { "epoch": 254.96, "eval_gen_len": 4.6522, "eval_loss": 0.8321281671524048, "eval_rouge1": 0.0145, "eval_rouge2": 0.0009, "eval_rougeL": 0.012, "eval_rougeLsum": 0.0117, "eval_runtime": 12.8126, "eval_samples_per_second": 8.976, "eval_steps_per_second": 1.561, "step": 4908 }, { "epoch": 256.0, "eval_gen_len": 4.2522, "eval_loss": 0.8296378254890442, "eval_rouge1": 0.0139, "eval_rouge2": 0.0009, "eval_rougeL": 0.0113, "eval_rougeLsum": 0.0112, "eval_runtime": 12.3844, "eval_samples_per_second": 9.286, "eval_steps_per_second": 1.615, "step": 4928 }, { "epoch": 256.99, "eval_gen_len": 4.1826, "eval_loss": 0.8276596069335938, "eval_rouge1": 0.0143, "eval_rouge2": 0.0009, "eval_rougeL": 0.0118, "eval_rougeLsum": 0.0117, "eval_runtime": 12.4227, "eval_samples_per_second": 9.257, "eval_steps_per_second": 1.61, "step": 4947 }, { "epoch": 257.97, "eval_gen_len": 3.6261, "eval_loss": 0.8265025019645691, "eval_rouge1": 0.0127, "eval_rouge2": 0.0007, "eval_rougeL": 0.0103, "eval_rougeLsum": 0.01, "eval_runtime": 12.2122, "eval_samples_per_second": 9.417, "eval_steps_per_second": 1.638, "step": 4966 }, { "epoch": 258.96, "eval_gen_len": 3.2609, "eval_loss": 0.8242406845092773, "eval_rouge1": 0.0122, "eval_rouge2": 0.0013, "eval_rougeL": 0.0102, "eval_rougeLsum": 0.0101, "eval_runtime": 12.2099, "eval_samples_per_second": 9.419, "eval_steps_per_second": 1.638, "step": 4985 }, { "epoch": 259.74, "grad_norm": 0.4785182774066925, "learning_rate": 4.741052631578948e-06, "loss": 0.9442, "step": 5000 }, { "epoch": 260.0, "eval_gen_len": 2.8, "eval_loss": 0.8225219249725342, "eval_rouge1": 0.0097, "eval_rouge2": 0.0011, "eval_rougeL": 0.0082, "eval_rougeLsum": 0.0082, "eval_runtime": 12.9652, "eval_samples_per_second": 8.87, "eval_steps_per_second": 1.543, "step": 5005 }, { "epoch": 260.99, "eval_gen_len": 2.5652, "eval_loss": 0.8206771612167358, "eval_rouge1": 0.0087, "eval_rouge2": 0.0011, "eval_rougeL": 0.0071, "eval_rougeLsum": 0.0069, "eval_runtime": 12.2186, "eval_samples_per_second": 9.412, "eval_steps_per_second": 1.637, "step": 5024 }, { "epoch": 261.97, "eval_gen_len": 2.2348, "eval_loss": 0.818169891834259, "eval_rouge1": 0.0072, "eval_rouge2": 0.0005, "eval_rougeL": 0.0059, "eval_rougeLsum": 0.0058, "eval_runtime": 12.5714, "eval_samples_per_second": 9.148, "eval_steps_per_second": 1.591, "step": 5043 }, { "epoch": 262.96, "eval_gen_len": 2.2, "eval_loss": 0.8162385821342468, "eval_rouge1": 0.0062, "eval_rouge2": 0.0002, "eval_rougeL": 0.0051, "eval_rougeLsum": 0.0051, "eval_runtime": 12.5747, "eval_samples_per_second": 9.145, "eval_steps_per_second": 1.59, "step": 5062 }, { "epoch": 264.0, "eval_gen_len": 2.2087, "eval_loss": 0.8145304918289185, "eval_rouge1": 0.0068, "eval_rouge2": 0.0005, "eval_rougeL": 0.0056, "eval_rougeLsum": 0.0057, "eval_runtime": 12.6293, "eval_samples_per_second": 9.106, "eval_steps_per_second": 1.584, "step": 5082 }, { "epoch": 264.99, "eval_gen_len": 2.3304, "eval_loss": 0.8127499222755432, "eval_rouge1": 0.0086, "eval_rouge2": 0.002, "eval_rougeL": 0.0076, "eval_rougeLsum": 0.0075, "eval_runtime": 13.2312, "eval_samples_per_second": 8.692, "eval_steps_per_second": 1.512, "step": 5101 }, { "epoch": 265.97, "eval_gen_len": 1.8957, "eval_loss": 0.811177670955658, "eval_rouge1": 0.0057, "eval_rouge2": 0.001, "eval_rougeL": 0.0052, "eval_rougeLsum": 0.0051, "eval_runtime": 12.5731, "eval_samples_per_second": 9.147, "eval_steps_per_second": 1.591, "step": 5120 }, { "epoch": 266.96, "eval_gen_len": 1.513, "eval_loss": 0.8090675473213196, "eval_rouge1": 0.0042, "eval_rouge2": 0.0007, "eval_rougeL": 0.004, "eval_rougeLsum": 0.004, "eval_runtime": 12.391, "eval_samples_per_second": 9.281, "eval_steps_per_second": 1.614, "step": 5139 }, { "epoch": 268.0, "eval_gen_len": 1.2435, "eval_loss": 0.8073368668556213, "eval_rouge1": 0.0031, "eval_rouge2": 0.0006, "eval_rougeL": 0.0029, "eval_rougeLsum": 0.0029, "eval_runtime": 12.3681, "eval_samples_per_second": 9.298, "eval_steps_per_second": 1.617, "step": 5159 }, { "epoch": 268.99, "eval_gen_len": 1.0348, "eval_loss": 0.8059150576591492, "eval_rouge1": 0.0031, "eval_rouge2": 0.0006, "eval_rougeL": 0.0029, "eval_rougeLsum": 0.0029, "eval_runtime": 12.3463, "eval_samples_per_second": 9.315, "eval_steps_per_second": 1.62, "step": 5178 }, { "epoch": 269.97, "eval_gen_len": 0.6348, "eval_loss": 0.8042454123497009, "eval_rouge1": 0.0007, "eval_rouge2": 0.0, "eval_rougeL": 0.0007, "eval_rougeLsum": 0.0007, "eval_runtime": 12.4194, "eval_samples_per_second": 9.26, "eval_steps_per_second": 1.61, "step": 5197 }, { "epoch": 270.96, "eval_gen_len": 0.7304, "eval_loss": 0.8023030161857605, "eval_rouge1": 0.0012, "eval_rouge2": 0.0003, "eval_rougeL": 0.0011, "eval_rougeLsum": 0.001, "eval_runtime": 12.5901, "eval_samples_per_second": 9.134, "eval_steps_per_second": 1.589, "step": 5216 }, { "epoch": 272.0, "eval_gen_len": 0.8, "eval_loss": 0.8001125454902649, "eval_rouge1": 0.0012, "eval_rouge2": 0.0003, "eval_rougeL": 0.0011, "eval_rougeLsum": 0.001, "eval_runtime": 12.5732, "eval_samples_per_second": 9.146, "eval_steps_per_second": 1.591, "step": 5236 }, { "epoch": 272.99, "eval_gen_len": 0.6348, "eval_loss": 0.7986020445823669, "eval_rouge1": 0.0012, "eval_rouge2": 0.0003, "eval_rougeL": 0.0011, "eval_rougeLsum": 0.001, "eval_runtime": 12.389, "eval_samples_per_second": 9.282, "eval_steps_per_second": 1.614, "step": 5255 }, { "epoch": 273.97, "eval_gen_len": 0.7478, "eval_loss": 0.7969604730606079, "eval_rouge1": 0.0012, "eval_rouge2": 0.0003, "eval_rougeL": 0.0011, "eval_rougeLsum": 0.001, "eval_runtime": 12.3419, "eval_samples_per_second": 9.318, "eval_steps_per_second": 1.62, "step": 5274 }, { "epoch": 274.96, "eval_gen_len": 0.5826, "eval_loss": 0.795600175857544, "eval_rouge1": 0.0004, "eval_rouge2": 0.0, "eval_rougeL": 0.0004, "eval_rougeLsum": 0.0004, "eval_runtime": 12.3627, "eval_samples_per_second": 9.302, "eval_steps_per_second": 1.618, "step": 5293 }, { "epoch": 276.0, "eval_gen_len": 0.4, "eval_loss": 0.7938172817230225, "eval_rouge1": 0.0004, "eval_rouge2": 0.0, "eval_rougeL": 0.0004, "eval_rougeLsum": 0.0004, "eval_runtime": 12.3706, "eval_samples_per_second": 9.296, "eval_steps_per_second": 1.617, "step": 5313 }, { "epoch": 276.99, "eval_gen_len": 0.2261, "eval_loss": 0.7923696041107178, "eval_rouge1": 0.0002, "eval_rouge2": 0.0, "eval_rougeL": 0.0002, "eval_rougeLsum": 0.0002, "eval_runtime": 12.4258, "eval_samples_per_second": 9.255, "eval_steps_per_second": 1.61, "step": 5332 }, { "epoch": 277.97, "eval_gen_len": 0.2261, "eval_loss": 0.7907570600509644, "eval_rouge1": 0.0002, "eval_rouge2": 0.0, "eval_rougeL": 0.0002, "eval_rougeLsum": 0.0002, "eval_runtime": 12.5909, "eval_samples_per_second": 9.134, "eval_steps_per_second": 1.588, "step": 5351 }, { "epoch": 278.96, "eval_gen_len": 0.2, "eval_loss": 0.7891109585762024, "eval_rouge1": 0.0002, "eval_rouge2": 0.0, "eval_rougeL": 0.0002, "eval_rougeLsum": 0.0002, "eval_runtime": 13.0018, "eval_samples_per_second": 8.845, "eval_steps_per_second": 1.538, "step": 5370 }, { "epoch": 280.0, "eval_gen_len": 0.1826, "eval_loss": 0.787673830986023, "eval_rouge1": 0.0002, "eval_rouge2": 0.0, "eval_rougeL": 0.0002, "eval_rougeLsum": 0.0002, "eval_runtime": 12.0698, "eval_samples_per_second": 9.528, "eval_steps_per_second": 1.657, "step": 5390 }, { "epoch": 280.99, "eval_gen_len": 0.1739, "eval_loss": 0.785959005355835, "eval_rouge1": 0.0002, "eval_rouge2": 0.0, "eval_rougeL": 0.0002, "eval_rougeLsum": 0.0002, "eval_runtime": 12.4254, "eval_samples_per_second": 9.255, "eval_steps_per_second": 1.61, "step": 5409 }, { "epoch": 281.97, "eval_gen_len": 0.1739, "eval_loss": 0.7843196988105774, "eval_rouge1": 0.0002, "eval_rouge2": 0.0, "eval_rougeL": 0.0002, "eval_rougeLsum": 0.0002, "eval_runtime": 12.6986, "eval_samples_per_second": 9.056, "eval_steps_per_second": 1.575, "step": 5428 }, { "epoch": 282.96, "eval_gen_len": 0.1739, "eval_loss": 0.7826663851737976, "eval_rouge1": 0.0002, "eval_rouge2": 0.0, "eval_rougeL": 0.0002, "eval_rougeLsum": 0.0002, "eval_runtime": 12.2827, "eval_samples_per_second": 9.363, "eval_steps_per_second": 1.628, "step": 5447 }, { "epoch": 284.0, "eval_gen_len": 0.1739, "eval_loss": 0.7811480760574341, "eval_rouge1": 0.0002, "eval_rouge2": 0.0, "eval_rougeL": 0.0002, "eval_rougeLsum": 0.0002, "eval_runtime": 12.6013, "eval_samples_per_second": 9.126, "eval_steps_per_second": 1.587, "step": 5467 }, { "epoch": 284.99, "eval_gen_len": 0.1652, "eval_loss": 0.7799001932144165, "eval_rouge1": 0.0002, "eval_rouge2": 0.0, "eval_rougeL": 0.0002, "eval_rougeLsum": 0.0002, "eval_runtime": 12.3727, "eval_samples_per_second": 9.295, "eval_steps_per_second": 1.616, "step": 5486 }, { "epoch": 285.71, "grad_norm": 0.36042362451553345, "learning_rate": 4.214736842105263e-06, "loss": 0.8855, "step": 5500 }, { "epoch": 285.97, "eval_gen_len": 0.1652, "eval_loss": 0.7784348726272583, "eval_rouge1": 0.0002, "eval_rouge2": 0.0, "eval_rougeL": 0.0002, "eval_rougeLsum": 0.0002, "eval_runtime": 12.4298, "eval_samples_per_second": 9.252, "eval_steps_per_second": 1.609, "step": 5505 }, { "epoch": 286.96, "eval_gen_len": 0.1652, "eval_loss": 0.7772350311279297, "eval_rouge1": 0.0002, "eval_rouge2": 0.0, "eval_rougeL": 0.0002, "eval_rougeLsum": 0.0002, "eval_runtime": 12.5935, "eval_samples_per_second": 9.132, "eval_steps_per_second": 1.588, "step": 5524 }, { "epoch": 288.0, "eval_gen_len": 0.1652, "eval_loss": 0.775896430015564, "eval_rouge1": 0.0002, "eval_rouge2": 0.0, "eval_rougeL": 0.0002, "eval_rougeLsum": 0.0002, "eval_runtime": 12.6907, "eval_samples_per_second": 9.062, "eval_steps_per_second": 1.576, "step": 5544 }, { "epoch": 288.99, "eval_gen_len": 0.1652, "eval_loss": 0.7743993401527405, "eval_rouge1": 0.0002, "eval_rouge2": 0.0, "eval_rougeL": 0.0002, "eval_rougeLsum": 0.0002, "eval_runtime": 12.7195, "eval_samples_per_second": 9.041, "eval_steps_per_second": 1.572, "step": 5563 }, { "epoch": 289.97, "eval_gen_len": 0.1652, "eval_loss": 0.7728458046913147, "eval_rouge1": 0.0002, "eval_rouge2": 0.0, "eval_rougeL": 0.0002, "eval_rougeLsum": 0.0002, "eval_runtime": 12.71, "eval_samples_per_second": 9.048, "eval_steps_per_second": 1.574, "step": 5582 }, { "epoch": 290.96, "eval_gen_len": 0.0, "eval_loss": 0.7715795636177063, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.6969, "eval_samples_per_second": 9.057, "eval_steps_per_second": 1.575, "step": 5601 }, { "epoch": 292.0, "eval_gen_len": 0.0, "eval_loss": 0.7701930999755859, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.902, "eval_samples_per_second": 8.913, "eval_steps_per_second": 1.55, "step": 5621 }, { "epoch": 292.99, "eval_gen_len": 0.0, "eval_loss": 0.7691376209259033, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.3513, "eval_samples_per_second": 9.311, "eval_steps_per_second": 1.619, "step": 5640 }, { "epoch": 293.97, "eval_gen_len": 0.0, "eval_loss": 0.7679579257965088, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.4173, "eval_samples_per_second": 9.261, "eval_steps_per_second": 1.611, "step": 5659 }, { "epoch": 294.96, "eval_gen_len": 0.0, "eval_loss": 0.7667289972305298, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.6438, "eval_samples_per_second": 9.095, "eval_steps_per_second": 1.582, "step": 5678 }, { "epoch": 296.0, "eval_gen_len": 0.0, "eval_loss": 0.7650233507156372, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.1233, "eval_samples_per_second": 9.486, "eval_steps_per_second": 1.65, "step": 5698 }, { "epoch": 296.99, "eval_gen_len": 0.0, "eval_loss": 0.7638988494873047, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.1018, "eval_samples_per_second": 9.503, "eval_steps_per_second": 1.653, "step": 5717 }, { "epoch": 297.97, "eval_gen_len": 0.0, "eval_loss": 0.7627271413803101, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.0736, "eval_samples_per_second": 9.525, "eval_steps_per_second": 1.657, "step": 5736 }, { "epoch": 298.96, "eval_gen_len": 0.0, "eval_loss": 0.761401891708374, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.4179, "eval_samples_per_second": 9.261, "eval_steps_per_second": 1.611, "step": 5755 }, { "epoch": 300.0, "eval_gen_len": 0.0, "eval_loss": 0.7603045105934143, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.1939, "eval_samples_per_second": 9.431, "eval_steps_per_second": 1.64, "step": 5775 }, { "epoch": 300.99, "eval_gen_len": 0.0, "eval_loss": 0.7593241333961487, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.4302, "eval_samples_per_second": 9.252, "eval_steps_per_second": 1.609, "step": 5794 }, { "epoch": 301.97, "eval_gen_len": 0.0, "eval_loss": 0.7581080198287964, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.7272, "eval_samples_per_second": 9.036, "eval_steps_per_second": 1.571, "step": 5813 }, { "epoch": 302.96, "eval_gen_len": 0.0, "eval_loss": 0.7565290927886963, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.7621, "eval_samples_per_second": 9.011, "eval_steps_per_second": 1.567, "step": 5832 }, { "epoch": 304.0, "eval_gen_len": 0.0, "eval_loss": 0.7556654810905457, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.4563, "eval_samples_per_second": 9.232, "eval_steps_per_second": 1.606, "step": 5852 }, { "epoch": 304.99, "eval_gen_len": 0.0, "eval_loss": 0.754369854927063, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.207, "eval_samples_per_second": 9.421, "eval_steps_per_second": 1.638, "step": 5871 }, { "epoch": 305.97, "eval_gen_len": 0.0, "eval_loss": 0.7534385919570923, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.6305, "eval_samples_per_second": 9.105, "eval_steps_per_second": 1.583, "step": 5890 }, { "epoch": 306.96, "eval_gen_len": 0.0, "eval_loss": 0.7526547908782959, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.4513, "eval_samples_per_second": 9.236, "eval_steps_per_second": 1.606, "step": 5909 }, { "epoch": 308.0, "eval_gen_len": 0.0, "eval_loss": 0.7513379454612732, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.349, "eval_samples_per_second": 9.312, "eval_steps_per_second": 1.62, "step": 5929 }, { "epoch": 308.99, "eval_gen_len": 0.0, "eval_loss": 0.7506363987922668, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.0976, "eval_samples_per_second": 9.506, "eval_steps_per_second": 1.653, "step": 5948 }, { "epoch": 309.97, "eval_gen_len": 0.0, "eval_loss": 0.7496155500411987, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.3015, "eval_samples_per_second": 9.348, "eval_steps_per_second": 1.626, "step": 5967 }, { "epoch": 310.96, "eval_gen_len": 0.0, "eval_loss": 0.7488384246826172, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.3367, "eval_samples_per_second": 9.322, "eval_steps_per_second": 1.621, "step": 5986 }, { "epoch": 311.69, "grad_norm": 0.3260189890861511, "learning_rate": 3.6884210526315794e-06, "loss": 0.8402, "step": 6000 }, { "epoch": 312.0, "eval_gen_len": 0.0, "eval_loss": 0.7474696040153503, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.3992, "eval_samples_per_second": 9.275, "eval_steps_per_second": 1.613, "step": 6006 }, { "epoch": 312.99, "eval_gen_len": 0.0, "eval_loss": 0.7464930415153503, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 11.9232, "eval_samples_per_second": 9.645, "eval_steps_per_second": 1.677, "step": 6025 }, { "epoch": 313.97, "eval_gen_len": 0.0, "eval_loss": 0.7456102967262268, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.6801, "eval_samples_per_second": 9.069, "eval_steps_per_second": 1.577, "step": 6044 }, { "epoch": 314.96, "eval_gen_len": 0.0, "eval_loss": 0.7446662783622742, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.3568, "eval_samples_per_second": 9.307, "eval_steps_per_second": 1.619, "step": 6063 }, { "epoch": 316.0, "eval_gen_len": 0.0, "eval_loss": 0.7433856725692749, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 11.987, "eval_samples_per_second": 9.594, "eval_steps_per_second": 1.668, "step": 6083 }, { "epoch": 316.99, "eval_gen_len": 0.0, "eval_loss": 0.7426111698150635, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.6966, "eval_samples_per_second": 9.058, "eval_steps_per_second": 1.575, "step": 6102 }, { "epoch": 317.97, "eval_gen_len": 0.0, "eval_loss": 0.7413787841796875, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.2025, "eval_samples_per_second": 9.424, "eval_steps_per_second": 1.639, "step": 6121 }, { "epoch": 318.96, "eval_gen_len": 0.0, "eval_loss": 0.7404425144195557, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.9943, "eval_samples_per_second": 8.85, "eval_steps_per_second": 1.539, "step": 6140 }, { "epoch": 320.0, "eval_gen_len": 0.0, "eval_loss": 0.7396877408027649, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.3803, "eval_samples_per_second": 9.289, "eval_steps_per_second": 1.615, "step": 6160 }, { "epoch": 320.99, "eval_gen_len": 0.0, "eval_loss": 0.739030122756958, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 13.3182, "eval_samples_per_second": 8.635, "eval_steps_per_second": 1.502, "step": 6179 }, { "epoch": 321.97, "eval_gen_len": 0.0, "eval_loss": 0.7381538152694702, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.7464, "eval_samples_per_second": 9.022, "eval_steps_per_second": 1.569, "step": 6198 }, { "epoch": 322.96, "eval_gen_len": 0.0, "eval_loss": 0.7372981309890747, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.1988, "eval_samples_per_second": 9.427, "eval_steps_per_second": 1.64, "step": 6217 }, { "epoch": 324.0, "eval_gen_len": 0.0, "eval_loss": 0.7361249327659607, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 13.3472, "eval_samples_per_second": 8.616, "eval_steps_per_second": 1.498, "step": 6237 }, { "epoch": 324.99, "eval_gen_len": 0.0, "eval_loss": 0.735177218914032, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.2208, "eval_samples_per_second": 9.41, "eval_steps_per_second": 1.637, "step": 6256 }, { "epoch": 325.97, "eval_gen_len": 0.0, "eval_loss": 0.7344561219215393, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.9594, "eval_samples_per_second": 8.874, "eval_steps_per_second": 1.543, "step": 6275 }, { "epoch": 326.96, "eval_gen_len": 0.0, "eval_loss": 0.7334731817245483, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 13.1094, "eval_samples_per_second": 8.772, "eval_steps_per_second": 1.526, "step": 6294 }, { "epoch": 328.0, "eval_gen_len": 0.0, "eval_loss": 0.7326551079750061, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.7729, "eval_samples_per_second": 9.003, "eval_steps_per_second": 1.566, "step": 6314 }, { "epoch": 328.99, "eval_gen_len": 0.0, "eval_loss": 0.7316291332244873, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.415, "eval_samples_per_second": 9.263, "eval_steps_per_second": 1.611, "step": 6333 }, { "epoch": 329.97, "eval_gen_len": 0.0, "eval_loss": 0.7311994433403015, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.9044, "eval_samples_per_second": 8.912, "eval_steps_per_second": 1.55, "step": 6352 }, { "epoch": 330.96, "eval_gen_len": 0.0, "eval_loss": 0.7306154370307922, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 13.0335, "eval_samples_per_second": 8.823, "eval_steps_per_second": 1.535, "step": 6371 }, { "epoch": 332.0, "eval_gen_len": 0.0, "eval_loss": 0.7298057675361633, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.5384, "eval_samples_per_second": 9.172, "eval_steps_per_second": 1.595, "step": 6391 }, { "epoch": 332.99, "eval_gen_len": 0.0, "eval_loss": 0.7290323972702026, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.7202, "eval_samples_per_second": 9.041, "eval_steps_per_second": 1.572, "step": 6410 }, { "epoch": 333.97, "eval_gen_len": 0.0, "eval_loss": 0.7283275127410889, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.5197, "eval_samples_per_second": 9.186, "eval_steps_per_second": 1.597, "step": 6429 }, { "epoch": 334.96, "eval_gen_len": 0.0, "eval_loss": 0.7273982763290405, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.6808, "eval_samples_per_second": 9.069, "eval_steps_per_second": 1.577, "step": 6448 }, { "epoch": 336.0, "eval_gen_len": 0.0, "eval_loss": 0.7265883684158325, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 13.0488, "eval_samples_per_second": 8.813, "eval_steps_per_second": 1.533, "step": 6468 }, { "epoch": 336.99, "eval_gen_len": 0.0, "eval_loss": 0.7261592745780945, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.6573, "eval_samples_per_second": 9.086, "eval_steps_per_second": 1.58, "step": 6487 }, { "epoch": 337.66, "grad_norm": 0.288989782333374, "learning_rate": 3.1621052631578953e-06, "loss": 0.8058, "step": 6500 }, { "epoch": 337.97, "eval_gen_len": 0.0, "eval_loss": 0.7252629995346069, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.4295, "eval_samples_per_second": 9.252, "eval_steps_per_second": 1.609, "step": 6506 }, { "epoch": 338.96, "eval_gen_len": 0.0, "eval_loss": 0.7245468497276306, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.7865, "eval_samples_per_second": 8.994, "eval_steps_per_second": 1.564, "step": 6525 }, { "epoch": 340.0, "eval_gen_len": 0.0, "eval_loss": 0.7239726185798645, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.9112, "eval_samples_per_second": 8.907, "eval_steps_per_second": 1.549, "step": 6545 }, { "epoch": 340.99, "eval_gen_len": 0.0, "eval_loss": 0.7231466770172119, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.5487, "eval_samples_per_second": 9.164, "eval_steps_per_second": 1.594, "step": 6564 }, { "epoch": 341.97, "eval_gen_len": 0.0, "eval_loss": 0.7223904728889465, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 13.3347, "eval_samples_per_second": 8.624, "eval_steps_per_second": 1.5, "step": 6583 }, { "epoch": 342.96, "eval_gen_len": 0.0, "eval_loss": 0.7218188643455505, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 13.0031, "eval_samples_per_second": 8.844, "eval_steps_per_second": 1.538, "step": 6602 }, { "epoch": 344.0, "eval_gen_len": 0.0, "eval_loss": 0.7209810614585876, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.4608, "eval_samples_per_second": 9.229, "eval_steps_per_second": 1.605, "step": 6622 }, { "epoch": 344.99, "eval_gen_len": 0.0, "eval_loss": 0.7203324437141418, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.5304, "eval_samples_per_second": 9.178, "eval_steps_per_second": 1.596, "step": 6641 }, { "epoch": 345.97, "eval_gen_len": 0.0, "eval_loss": 0.7196723818778992, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.6257, "eval_samples_per_second": 9.108, "eval_steps_per_second": 1.584, "step": 6660 }, { "epoch": 346.96, "eval_gen_len": 0.0, "eval_loss": 0.7190775275230408, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.4134, "eval_samples_per_second": 9.264, "eval_steps_per_second": 1.611, "step": 6679 }, { "epoch": 348.0, "eval_gen_len": 0.0, "eval_loss": 0.7185074090957642, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.166, "eval_samples_per_second": 9.453, "eval_steps_per_second": 1.644, "step": 6699 }, { "epoch": 348.99, "eval_gen_len": 0.0, "eval_loss": 0.7180371880531311, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.559, "eval_samples_per_second": 9.157, "eval_steps_per_second": 1.592, "step": 6718 }, { "epoch": 349.97, "eval_gen_len": 0.0, "eval_loss": 0.717097818851471, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.3, "eval_samples_per_second": 9.35, "eval_steps_per_second": 1.626, "step": 6737 }, { "epoch": 350.96, "eval_gen_len": 0.0, "eval_loss": 0.7164217829704285, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.3583, "eval_samples_per_second": 9.306, "eval_steps_per_second": 1.618, "step": 6756 }, { "epoch": 352.0, "eval_gen_len": 0.0, "eval_loss": 0.7158520817756653, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 13.0047, "eval_samples_per_second": 8.843, "eval_steps_per_second": 1.538, "step": 6776 }, { "epoch": 352.99, "eval_gen_len": 0.0, "eval_loss": 0.7151947021484375, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.6741, "eval_samples_per_second": 9.074, "eval_steps_per_second": 1.578, "step": 6795 }, { "epoch": 353.97, "eval_gen_len": 0.0, "eval_loss": 0.7145124077796936, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.6036, "eval_samples_per_second": 9.124, "eval_steps_per_second": 1.587, "step": 6814 }, { "epoch": 354.96, "eval_gen_len": 0.0, "eval_loss": 0.7140352725982666, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.2129, "eval_samples_per_second": 9.416, "eval_steps_per_second": 1.638, "step": 6833 }, { "epoch": 356.0, "eval_gen_len": 0.0, "eval_loss": 0.7135369777679443, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.4624, "eval_samples_per_second": 9.228, "eval_steps_per_second": 1.605, "step": 6853 }, { "epoch": 356.99, "eval_gen_len": 0.0, "eval_loss": 0.7128369808197021, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.422, "eval_samples_per_second": 9.258, "eval_steps_per_second": 1.61, "step": 6872 }, { "epoch": 357.97, "eval_gen_len": 0.0, "eval_loss": 0.7122591137886047, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.2268, "eval_samples_per_second": 9.406, "eval_steps_per_second": 1.636, "step": 6891 }, { "epoch": 358.96, "eval_gen_len": 0.0, "eval_loss": 0.7116859555244446, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.866, "eval_samples_per_second": 8.938, "eval_steps_per_second": 1.554, "step": 6910 }, { "epoch": 360.0, "eval_gen_len": 0.0, "eval_loss": 0.7112235426902771, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.4942, "eval_samples_per_second": 9.204, "eval_steps_per_second": 1.601, "step": 6930 }, { "epoch": 360.99, "eval_gen_len": 0.0, "eval_loss": 0.7106695771217346, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.4042, "eval_samples_per_second": 9.271, "eval_steps_per_second": 1.612, "step": 6949 }, { "epoch": 361.97, "eval_gen_len": 0.0, "eval_loss": 0.710101306438446, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.2879, "eval_samples_per_second": 9.359, "eval_steps_per_second": 1.628, "step": 6968 }, { "epoch": 362.96, "eval_gen_len": 0.0, "eval_loss": 0.7094107270240784, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.269, "eval_samples_per_second": 9.373, "eval_steps_per_second": 1.63, "step": 6987 }, { "epoch": 363.64, "grad_norm": 0.44062402844429016, "learning_rate": 2.635789473684211e-06, "loss": 0.7798, "step": 7000 }, { "epoch": 364.0, "eval_gen_len": 0.0, "eval_loss": 0.708891749382019, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.9085, "eval_samples_per_second": 8.909, "eval_steps_per_second": 1.549, "step": 7007 }, { "epoch": 364.99, "eval_gen_len": 0.0, "eval_loss": 0.7082711458206177, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.1554, "eval_samples_per_second": 9.461, "eval_steps_per_second": 1.645, "step": 7026 }, { "epoch": 365.97, "eval_gen_len": 0.0, "eval_loss": 0.7078844904899597, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.6546, "eval_samples_per_second": 9.088, "eval_steps_per_second": 1.58, "step": 7045 }, { "epoch": 366.96, "eval_gen_len": 0.0, "eval_loss": 0.7073128819465637, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.7172, "eval_samples_per_second": 9.043, "eval_steps_per_second": 1.573, "step": 7064 }, { "epoch": 368.0, "eval_gen_len": 0.0, "eval_loss": 0.7066096067428589, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.1125, "eval_samples_per_second": 9.494, "eval_steps_per_second": 1.651, "step": 7084 }, { "epoch": 368.99, "eval_gen_len": 0.0, "eval_loss": 0.7057228088378906, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.5605, "eval_samples_per_second": 9.156, "eval_steps_per_second": 1.592, "step": 7103 }, { "epoch": 369.97, "eval_gen_len": 0.0, "eval_loss": 0.7048721313476562, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.3178, "eval_samples_per_second": 9.336, "eval_steps_per_second": 1.624, "step": 7122 }, { "epoch": 370.96, "eval_gen_len": 0.0, "eval_loss": 0.7041941285133362, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.2739, "eval_samples_per_second": 9.369, "eval_steps_per_second": 1.629, "step": 7141 }, { "epoch": 372.0, "eval_gen_len": 0.0, "eval_loss": 0.7035704255104065, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.3207, "eval_samples_per_second": 9.334, "eval_steps_per_second": 1.623, "step": 7161 }, { "epoch": 372.99, "eval_gen_len": 0.0, "eval_loss": 0.7029441595077515, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.094, "eval_samples_per_second": 9.509, "eval_steps_per_second": 1.654, "step": 7180 }, { "epoch": 373.97, "eval_gen_len": 0.0, "eval_loss": 0.7022525668144226, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.7042, "eval_samples_per_second": 9.052, "eval_steps_per_second": 1.574, "step": 7199 }, { "epoch": 374.96, "eval_gen_len": 0.0, "eval_loss": 0.7016597986221313, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.3663, "eval_samples_per_second": 9.299, "eval_steps_per_second": 1.617, "step": 7218 }, { "epoch": 376.0, "eval_gen_len": 0.0, "eval_loss": 0.7011125683784485, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.2533, "eval_samples_per_second": 9.385, "eval_steps_per_second": 1.632, "step": 7238 }, { "epoch": 376.99, "eval_gen_len": 0.0, "eval_loss": 0.7006986737251282, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.4459, "eval_samples_per_second": 9.24, "eval_steps_per_second": 1.607, "step": 7257 }, { "epoch": 377.97, "eval_gen_len": 0.0, "eval_loss": 0.7000675201416016, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.5813, "eval_samples_per_second": 9.141, "eval_steps_per_second": 1.59, "step": 7276 }, { "epoch": 378.96, "eval_gen_len": 0.0, "eval_loss": 0.6994682550430298, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.7191, "eval_samples_per_second": 9.041, "eval_steps_per_second": 1.572, "step": 7295 }, { "epoch": 380.0, "eval_gen_len": 0.0, "eval_loss": 0.6987762451171875, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.7423, "eval_samples_per_second": 9.025, "eval_steps_per_second": 1.57, "step": 7315 }, { "epoch": 380.99, "eval_gen_len": 0.0, "eval_loss": 0.6981701254844666, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.7919, "eval_samples_per_second": 8.99, "eval_steps_per_second": 1.563, "step": 7334 }, { "epoch": 381.97, "eval_gen_len": 0.0, "eval_loss": 0.6976540088653564, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.6842, "eval_samples_per_second": 9.066, "eval_steps_per_second": 1.577, "step": 7353 }, { "epoch": 382.96, "eval_gen_len": 0.0, "eval_loss": 0.6971992254257202, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.7985, "eval_samples_per_second": 8.985, "eval_steps_per_second": 1.563, "step": 7372 }, { "epoch": 384.0, "eval_gen_len": 0.0, "eval_loss": 0.6967973113059998, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.5355, "eval_samples_per_second": 9.174, "eval_steps_per_second": 1.595, "step": 7392 }, { "epoch": 384.99, "eval_gen_len": 0.0, "eval_loss": 0.6962406039237976, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.7996, "eval_samples_per_second": 8.985, "eval_steps_per_second": 1.563, "step": 7411 }, { "epoch": 385.97, "eval_gen_len": 0.0, "eval_loss": 0.6957660913467407, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.4031, "eval_samples_per_second": 9.272, "eval_steps_per_second": 1.612, "step": 7430 }, { "epoch": 386.96, "eval_gen_len": 0.0, "eval_loss": 0.6953439116477966, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 13.2572, "eval_samples_per_second": 8.675, "eval_steps_per_second": 1.509, "step": 7449 }, { "epoch": 388.0, "eval_gen_len": 0.0, "eval_loss": 0.6948480606079102, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.5809, "eval_samples_per_second": 9.141, "eval_steps_per_second": 1.59, "step": 7469 }, { "epoch": 388.99, "eval_gen_len": 0.0, "eval_loss": 0.6944136023521423, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.3151, "eval_samples_per_second": 9.338, "eval_steps_per_second": 1.624, "step": 7488 }, { "epoch": 389.61, "grad_norm": 0.2672542333602905, "learning_rate": 2.1094736842105264e-06, "loss": 0.7599, "step": 7500 }, { "epoch": 389.97, "eval_gen_len": 0.0, "eval_loss": 0.6940454840660095, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.8439, "eval_samples_per_second": 8.954, "eval_steps_per_second": 1.557, "step": 7507 }, { "epoch": 390.96, "eval_gen_len": 0.0, "eval_loss": 0.693627655506134, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.9964, "eval_samples_per_second": 8.849, "eval_steps_per_second": 1.539, "step": 7526 }, { "epoch": 392.0, "eval_gen_len": 0.0, "eval_loss": 0.6933034062385559, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.8761, "eval_samples_per_second": 8.931, "eval_steps_per_second": 1.553, "step": 7546 }, { "epoch": 392.99, "eval_gen_len": 0.0, "eval_loss": 0.6929065585136414, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 13.196, "eval_samples_per_second": 8.715, "eval_steps_per_second": 1.516, "step": 7565 }, { "epoch": 393.97, "eval_gen_len": 0.0, "eval_loss": 0.6925193667411804, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.7742, "eval_samples_per_second": 9.002, "eval_steps_per_second": 1.566, "step": 7584 }, { "epoch": 394.96, "eval_gen_len": 0.0, "eval_loss": 0.6921875476837158, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.2942, "eval_samples_per_second": 9.354, "eval_steps_per_second": 1.627, "step": 7603 }, { "epoch": 396.0, "eval_gen_len": 0.0, "eval_loss": 0.6919534802436829, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.7028, "eval_samples_per_second": 9.053, "eval_steps_per_second": 1.574, "step": 7623 }, { "epoch": 396.99, "eval_gen_len": 0.0, "eval_loss": 0.6915541291236877, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.1374, "eval_samples_per_second": 9.475, "eval_steps_per_second": 1.648, "step": 7642 }, { "epoch": 397.97, "eval_gen_len": 0.0, "eval_loss": 0.6912309527397156, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.374, "eval_samples_per_second": 9.294, "eval_steps_per_second": 1.616, "step": 7661 }, { "epoch": 398.96, "eval_gen_len": 0.0, "eval_loss": 0.6908969879150391, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.8679, "eval_samples_per_second": 8.937, "eval_steps_per_second": 1.554, "step": 7680 }, { "epoch": 400.0, "eval_gen_len": 0.0, "eval_loss": 0.6906691193580627, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.8987, "eval_samples_per_second": 8.916, "eval_steps_per_second": 1.551, "step": 7700 }, { "epoch": 400.99, "eval_gen_len": 0.0, "eval_loss": 0.6903204321861267, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.7882, "eval_samples_per_second": 8.993, "eval_steps_per_second": 1.564, "step": 7719 }, { "epoch": 401.97, "eval_gen_len": 0.0, "eval_loss": 0.6900023221969604, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 13.052, "eval_samples_per_second": 8.811, "eval_steps_per_second": 1.532, "step": 7738 }, { "epoch": 402.96, "eval_gen_len": 0.0, "eval_loss": 0.6896329522132874, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.921, "eval_samples_per_second": 8.9, "eval_steps_per_second": 1.548, "step": 7757 }, { "epoch": 404.0, "eval_gen_len": 0.0, "eval_loss": 0.6893720030784607, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.755, "eval_samples_per_second": 9.016, "eval_steps_per_second": 1.568, "step": 7777 }, { "epoch": 404.99, "eval_gen_len": 0.0, "eval_loss": 0.6890887022018433, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.8106, "eval_samples_per_second": 8.977, "eval_steps_per_second": 1.561, "step": 7796 }, { "epoch": 405.97, "eval_gen_len": 0.0, "eval_loss": 0.6887722611427307, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.5889, "eval_samples_per_second": 9.135, "eval_steps_per_second": 1.589, "step": 7815 }, { "epoch": 406.96, "eval_gen_len": 0.0, "eval_loss": 0.6884374022483826, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.5505, "eval_samples_per_second": 9.163, "eval_steps_per_second": 1.594, "step": 7834 }, { "epoch": 408.0, "eval_gen_len": 0.0, "eval_loss": 0.6880633234977722, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.2243, "eval_samples_per_second": 9.407, "eval_steps_per_second": 1.636, "step": 7854 }, { "epoch": 408.99, "eval_gen_len": 0.0, "eval_loss": 0.6877562999725342, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.519, "eval_samples_per_second": 9.186, "eval_steps_per_second": 1.598, "step": 7873 }, { "epoch": 409.97, "eval_gen_len": 0.0, "eval_loss": 0.6874319314956665, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.2922, "eval_samples_per_second": 9.356, "eval_steps_per_second": 1.627, "step": 7892 }, { "epoch": 410.96, "eval_gen_len": 0.0, "eval_loss": 0.6871966123580933, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.4238, "eval_samples_per_second": 9.256, "eval_steps_per_second": 1.61, "step": 7911 }, { "epoch": 412.0, "eval_gen_len": 0.0, "eval_loss": 0.6869640350341797, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.3603, "eval_samples_per_second": 9.304, "eval_steps_per_second": 1.618, "step": 7931 }, { "epoch": 412.99, "eval_gen_len": 0.0, "eval_loss": 0.6867266297340393, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.7522, "eval_samples_per_second": 9.018, "eval_steps_per_second": 1.568, "step": 7950 }, { "epoch": 413.97, "eval_gen_len": 0.0, "eval_loss": 0.6865308880805969, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.7581, "eval_samples_per_second": 9.014, "eval_steps_per_second": 1.568, "step": 7969 }, { "epoch": 414.96, "eval_gen_len": 0.0, "eval_loss": 0.6863205432891846, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 13.0743, "eval_samples_per_second": 8.796, "eval_steps_per_second": 1.53, "step": 7988 }, { "epoch": 415.58, "grad_norm": 0.24980388581752777, "learning_rate": 1.5831578947368423e-06, "loss": 0.7446, "step": 8000 }, { "epoch": 416.0, "eval_gen_len": 0.0, "eval_loss": 0.6860491037368774, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.4592, "eval_samples_per_second": 9.23, "eval_steps_per_second": 1.605, "step": 8008 }, { "epoch": 416.99, "eval_gen_len": 0.0, "eval_loss": 0.6857825517654419, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.6928, "eval_samples_per_second": 9.06, "eval_steps_per_second": 1.576, "step": 8027 }, { "epoch": 417.97, "eval_gen_len": 0.0, "eval_loss": 0.6855095028877258, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.4988, "eval_samples_per_second": 9.201, "eval_steps_per_second": 1.6, "step": 8046 }, { "epoch": 418.96, "eval_gen_len": 0.0, "eval_loss": 0.6851878762245178, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.7595, "eval_samples_per_second": 9.013, "eval_steps_per_second": 1.567, "step": 8065 }, { "epoch": 420.0, "eval_gen_len": 0.0, "eval_loss": 0.6848768591880798, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.6421, "eval_samples_per_second": 9.097, "eval_steps_per_second": 1.582, "step": 8085 }, { "epoch": 420.99, "eval_gen_len": 0.0, "eval_loss": 0.684624969959259, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.5911, "eval_samples_per_second": 9.133, "eval_steps_per_second": 1.588, "step": 8104 }, { "epoch": 421.97, "eval_gen_len": 0.0, "eval_loss": 0.6844747066497803, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.4163, "eval_samples_per_second": 9.262, "eval_steps_per_second": 1.611, "step": 8123 }, { "epoch": 422.96, "eval_gen_len": 0.0, "eval_loss": 0.6842939257621765, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.2945, "eval_samples_per_second": 9.354, "eval_steps_per_second": 1.627, "step": 8142 }, { "epoch": 424.0, "eval_gen_len": 0.0, "eval_loss": 0.6840406060218811, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.9127, "eval_samples_per_second": 8.906, "eval_steps_per_second": 1.549, "step": 8162 }, { "epoch": 424.99, "eval_gen_len": 0.0, "eval_loss": 0.6838209629058838, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 13.1055, "eval_samples_per_second": 8.775, "eval_steps_per_second": 1.526, "step": 8181 }, { "epoch": 425.97, "eval_gen_len": 0.0, "eval_loss": 0.683562159538269, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.6087, "eval_samples_per_second": 9.121, "eval_steps_per_second": 1.586, "step": 8200 }, { "epoch": 426.96, "eval_gen_len": 0.0, "eval_loss": 0.6833438277244568, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.7323, "eval_samples_per_second": 9.032, "eval_steps_per_second": 1.571, "step": 8219 }, { "epoch": 428.0, "eval_gen_len": 0.0, "eval_loss": 0.6831278204917908, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.203, "eval_samples_per_second": 9.424, "eval_steps_per_second": 1.639, "step": 8239 }, { "epoch": 428.99, "eval_gen_len": 0.0, "eval_loss": 0.682895302772522, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.6316, "eval_samples_per_second": 9.104, "eval_steps_per_second": 1.583, "step": 8258 }, { "epoch": 429.97, "eval_gen_len": 0.0, "eval_loss": 0.6826810836791992, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.3534, "eval_samples_per_second": 9.309, "eval_steps_per_second": 1.619, "step": 8277 }, { "epoch": 430.96, "eval_gen_len": 0.0, "eval_loss": 0.6824563145637512, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.1952, "eval_samples_per_second": 9.43, "eval_steps_per_second": 1.64, "step": 8296 }, { "epoch": 432.0, "eval_gen_len": 0.0, "eval_loss": 0.682238757610321, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.3689, "eval_samples_per_second": 9.297, "eval_steps_per_second": 1.617, "step": 8316 }, { "epoch": 432.99, "eval_gen_len": 0.0, "eval_loss": 0.6820657253265381, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.7369, "eval_samples_per_second": 9.029, "eval_steps_per_second": 1.57, "step": 8335 }, { "epoch": 433.97, "eval_gen_len": 0.0, "eval_loss": 0.6819124817848206, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.3025, "eval_samples_per_second": 9.348, "eval_steps_per_second": 1.626, "step": 8354 }, { "epoch": 434.96, "eval_gen_len": 0.0, "eval_loss": 0.6817324161529541, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.3628, "eval_samples_per_second": 9.302, "eval_steps_per_second": 1.618, "step": 8373 }, { "epoch": 436.0, "eval_gen_len": 0.0, "eval_loss": 0.6815437078475952, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.1462, "eval_samples_per_second": 9.468, "eval_steps_per_second": 1.647, "step": 8393 }, { "epoch": 436.99, "eval_gen_len": 0.0, "eval_loss": 0.6813645958900452, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.5282, "eval_samples_per_second": 9.179, "eval_steps_per_second": 1.596, "step": 8412 }, { "epoch": 437.97, "eval_gen_len": 0.0, "eval_loss": 0.6811843514442444, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.2909, "eval_samples_per_second": 9.357, "eval_steps_per_second": 1.627, "step": 8431 }, { "epoch": 438.96, "eval_gen_len": 0.0, "eval_loss": 0.6810438632965088, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.1515, "eval_samples_per_second": 9.464, "eval_steps_per_second": 1.646, "step": 8450 }, { "epoch": 440.0, "eval_gen_len": 0.0, "eval_loss": 0.6809141635894775, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.3824, "eval_samples_per_second": 9.287, "eval_steps_per_second": 1.615, "step": 8470 }, { "epoch": 440.99, "eval_gen_len": 0.0, "eval_loss": 0.6807241439819336, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 12.4518, "eval_samples_per_second": 9.236, "eval_steps_per_second": 1.606, "step": 8489 }, { "epoch": 441.56, "grad_norm": 0.2838553488254547, "learning_rate": 1.0568421052631578e-06, "loss": 0.7356, "step": 8500 } ], "logging_steps": 500, "max_steps": 9500, "num_input_tokens_seen": 0, "num_train_epochs": 500, "save_steps": 500, "total_flos": 2.476698032312156e+17, "train_batch_size": 6, "trial_name": null, "trial_params": null }