diff --git "a/checkpoint-7500/trainer_state.json" "b/checkpoint-7500/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-7500/trainer_state.json" @@ -0,0 +1,5183 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 389.61038961038963, + "eval_steps": 500, + "global_step": 7500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.99, + "eval_gen_len": 14.3217, + "eval_loss": 21.81446075439453, + "eval_rouge1": 0.0662, + "eval_rouge2": 0.0082, + "eval_rougeL": 0.0523, + "eval_rougeLsum": 0.0526, + "eval_runtime": 12.9343, + "eval_samples_per_second": 8.891, + "eval_steps_per_second": 1.546, + "step": 19 + }, + { + "epoch": 1.97, + "eval_gen_len": 14.4, + "eval_loss": 21.760791778564453, + "eval_rouge1": 0.0661, + "eval_rouge2": 0.0082, + "eval_rougeL": 0.052, + "eval_rougeLsum": 0.0523, + "eval_runtime": 11.8223, + "eval_samples_per_second": 9.727, + "eval_steps_per_second": 1.692, + "step": 38 + }, + { + "epoch": 2.96, + "eval_gen_len": 14.4, + "eval_loss": 21.68472671508789, + "eval_rouge1": 0.0651, + "eval_rouge2": 0.0077, + "eval_rougeL": 0.051, + "eval_rougeLsum": 0.0513, + "eval_runtime": 12.1423, + "eval_samples_per_second": 9.471, + "eval_steps_per_second": 1.647, + "step": 57 + }, + { + "epoch": 4.0, + "eval_gen_len": 14.3304, + "eval_loss": 21.582918167114258, + "eval_rouge1": 0.0666, + "eval_rouge2": 0.0081, + "eval_rougeL": 0.0525, + "eval_rougeLsum": 0.0525, + "eval_runtime": 12.0406, + "eval_samples_per_second": 9.551, + "eval_steps_per_second": 1.661, + "step": 77 + }, + { + "epoch": 4.99, + "eval_gen_len": 14.2609, + "eval_loss": 21.46294403076172, + "eval_rouge1": 0.0668, + "eval_rouge2": 0.008, + "eval_rougeL": 0.0527, + "eval_rougeLsum": 0.0526, + "eval_runtime": 12.3725, + "eval_samples_per_second": 9.295, + "eval_steps_per_second": 1.616, + "step": 96 + }, + { + "epoch": 5.97, + "eval_gen_len": 14.2261, + "eval_loss": 21.31937599182129, + "eval_rouge1": 0.0671, + "eval_rouge2": 0.0077, + "eval_rougeL": 0.0525, + "eval_rougeLsum": 0.0525, + "eval_runtime": 12.5289, + "eval_samples_per_second": 9.179, + "eval_steps_per_second": 1.596, + "step": 115 + }, + { + "epoch": 6.96, + "eval_gen_len": 14.3043, + "eval_loss": 21.15254783630371, + "eval_rouge1": 0.0677, + "eval_rouge2": 0.0077, + "eval_rougeL": 0.0525, + "eval_rougeLsum": 0.0525, + "eval_runtime": 12.0936, + "eval_samples_per_second": 9.509, + "eval_steps_per_second": 1.654, + "step": 134 + }, + { + "epoch": 8.0, + "eval_gen_len": 14.2609, + "eval_loss": 20.956642150878906, + "eval_rouge1": 0.0679, + "eval_rouge2": 0.0076, + "eval_rougeL": 0.0522, + "eval_rougeLsum": 0.0521, + "eval_runtime": 12.6351, + "eval_samples_per_second": 9.102, + "eval_steps_per_second": 1.583, + "step": 154 + }, + { + "epoch": 8.99, + "eval_gen_len": 14.1217, + "eval_loss": 20.7694091796875, + "eval_rouge1": 0.0664, + "eval_rouge2": 0.0074, + "eval_rougeL": 0.0507, + "eval_rougeLsum": 0.0507, + "eval_runtime": 12.3661, + "eval_samples_per_second": 9.3, + "eval_steps_per_second": 1.617, + "step": 173 + }, + { + "epoch": 9.97, + "eval_gen_len": 14.0957, + "eval_loss": 20.60512351989746, + "eval_rouge1": 0.0685, + "eval_rouge2": 0.0078, + "eval_rougeL": 0.0519, + "eval_rougeLsum": 0.0518, + "eval_runtime": 12.6094, + "eval_samples_per_second": 9.12, + "eval_steps_per_second": 1.586, + "step": 192 + }, + { + "epoch": 10.96, + "eval_gen_len": 13.9913, + "eval_loss": 20.46657371520996, + "eval_rouge1": 0.0672, + "eval_rouge2": 0.0072, + "eval_rougeL": 0.0511, + "eval_rougeLsum": 0.0511, + "eval_runtime": 12.4343, + "eval_samples_per_second": 9.249, + "eval_steps_per_second": 1.608, + "step": 211 + }, + { + "epoch": 12.0, + "eval_gen_len": 14.2957, + "eval_loss": 20.335567474365234, + "eval_rouge1": 0.0658, + "eval_rouge2": 0.0079, + "eval_rougeL": 0.05, + "eval_rougeLsum": 0.05, + "eval_runtime": 12.4623, + "eval_samples_per_second": 9.228, + "eval_steps_per_second": 1.605, + "step": 231 + }, + { + "epoch": 12.99, + "eval_gen_len": 14.7826, + "eval_loss": 20.210546493530273, + "eval_rouge1": 0.0729, + "eval_rouge2": 0.0108, + "eval_rougeL": 0.0562, + "eval_rougeLsum": 0.056, + "eval_runtime": 12.6201, + "eval_samples_per_second": 9.112, + "eval_steps_per_second": 1.585, + "step": 250 + }, + { + "epoch": 13.97, + "eval_gen_len": 14.8174, + "eval_loss": 20.080350875854492, + "eval_rouge1": 0.0731, + "eval_rouge2": 0.0105, + "eval_rougeL": 0.057, + "eval_rougeLsum": 0.0566, + "eval_runtime": 12.5892, + "eval_samples_per_second": 9.135, + "eval_steps_per_second": 1.589, + "step": 269 + }, + { + "epoch": 14.96, + "eval_gen_len": 15.3826, + "eval_loss": 19.947362899780273, + "eval_rouge1": 0.0805, + "eval_rouge2": 0.0127, + "eval_rougeL": 0.0624, + "eval_rougeLsum": 0.0619, + "eval_runtime": 12.2446, + "eval_samples_per_second": 9.392, + "eval_steps_per_second": 1.633, + "step": 288 + }, + { + "epoch": 16.0, + "eval_gen_len": 15.8261, + "eval_loss": 19.80646324157715, + "eval_rouge1": 0.0818, + "eval_rouge2": 0.0129, + "eval_rougeL": 0.0636, + "eval_rougeLsum": 0.0633, + "eval_runtime": 12.037, + "eval_samples_per_second": 9.554, + "eval_steps_per_second": 1.662, + "step": 308 + }, + { + "epoch": 16.99, + "eval_gen_len": 16.1391, + "eval_loss": 19.66917610168457, + "eval_rouge1": 0.0837, + "eval_rouge2": 0.0139, + "eval_rougeL": 0.0648, + "eval_rougeLsum": 0.0647, + "eval_runtime": 12.6734, + "eval_samples_per_second": 9.074, + "eval_steps_per_second": 1.578, + "step": 327 + }, + { + "epoch": 17.97, + "eval_gen_len": 16.5391, + "eval_loss": 19.5202579498291, + "eval_rouge1": 0.0915, + "eval_rouge2": 0.0168, + "eval_rougeL": 0.0713, + "eval_rougeLsum": 0.0709, + "eval_runtime": 12.3571, + "eval_samples_per_second": 9.306, + "eval_steps_per_second": 1.619, + "step": 346 + }, + { + "epoch": 18.96, + "eval_gen_len": 16.687, + "eval_loss": 19.36480712890625, + "eval_rouge1": 0.0934, + "eval_rouge2": 0.0178, + "eval_rougeL": 0.072, + "eval_rougeLsum": 0.0719, + "eval_runtime": 12.3428, + "eval_samples_per_second": 9.317, + "eval_steps_per_second": 1.62, + "step": 365 + }, + { + "epoch": 20.0, + "eval_gen_len": 17.0522, + "eval_loss": 19.185970306396484, + "eval_rouge1": 0.0951, + "eval_rouge2": 0.0172, + "eval_rougeL": 0.0735, + "eval_rougeLsum": 0.0735, + "eval_runtime": 12.4924, + "eval_samples_per_second": 9.206, + "eval_steps_per_second": 1.601, + "step": 385 + }, + { + "epoch": 20.99, + "eval_gen_len": 17.1913, + "eval_loss": 18.998315811157227, + "eval_rouge1": 0.0938, + "eval_rouge2": 0.0192, + "eval_rougeL": 0.0754, + "eval_rougeLsum": 0.0755, + "eval_runtime": 12.2427, + "eval_samples_per_second": 9.393, + "eval_steps_per_second": 1.634, + "step": 404 + }, + { + "epoch": 21.97, + "eval_gen_len": 17.3304, + "eval_loss": 18.782604217529297, + "eval_rouge1": 0.0975, + "eval_rouge2": 0.0223, + "eval_rougeL": 0.0786, + "eval_rougeLsum": 0.0788, + "eval_runtime": 12.4341, + "eval_samples_per_second": 9.249, + "eval_steps_per_second": 1.608, + "step": 423 + }, + { + "epoch": 22.96, + "eval_gen_len": 17.3304, + "eval_loss": 18.529996871948242, + "eval_rouge1": 0.0986, + "eval_rouge2": 0.0229, + "eval_rougeL": 0.0787, + "eval_rougeLsum": 0.079, + "eval_runtime": 12.138, + "eval_samples_per_second": 9.474, + "eval_steps_per_second": 1.648, + "step": 442 + }, + { + "epoch": 24.0, + "eval_gen_len": 17.4696, + "eval_loss": 18.21288299560547, + "eval_rouge1": 0.0935, + "eval_rouge2": 0.0195, + "eval_rougeL": 0.0761, + "eval_rougeLsum": 0.0763, + "eval_runtime": 12.3892, + "eval_samples_per_second": 9.282, + "eval_steps_per_second": 1.614, + "step": 462 + }, + { + "epoch": 24.99, + "eval_gen_len": 17.6087, + "eval_loss": 17.844209671020508, + "eval_rouge1": 0.0936, + "eval_rouge2": 0.0225, + "eval_rougeL": 0.0756, + "eval_rougeLsum": 0.0758, + "eval_runtime": 12.2522, + "eval_samples_per_second": 9.386, + "eval_steps_per_second": 1.632, + "step": 481 + }, + { + "epoch": 25.97, + "grad_norm": 4.415005683898926, + "learning_rate": 9.474736842105265e-06, + "loss": 19.6383, + "step": 500 + }, + { + "epoch": 25.97, + "eval_gen_len": 17.7478, + "eval_loss": 17.39573097229004, + "eval_rouge1": 0.0967, + "eval_rouge2": 0.0221, + "eval_rougeL": 0.0765, + "eval_rougeLsum": 0.0764, + "eval_runtime": 12.5376, + "eval_samples_per_second": 9.172, + "eval_steps_per_second": 1.595, + "step": 500 + }, + { + "epoch": 26.96, + "eval_gen_len": 17.7478, + "eval_loss": 16.90059471130371, + "eval_rouge1": 0.0983, + "eval_rouge2": 0.0198, + "eval_rougeL": 0.0786, + "eval_rougeLsum": 0.0785, + "eval_runtime": 12.1506, + "eval_samples_per_second": 9.465, + "eval_steps_per_second": 1.646, + "step": 519 + }, + { + "epoch": 28.0, + "eval_gen_len": 17.3304, + "eval_loss": 16.37845802307129, + "eval_rouge1": 0.0916, + "eval_rouge2": 0.0162, + "eval_rougeL": 0.0723, + "eval_rougeLsum": 0.0724, + "eval_runtime": 12.9559, + "eval_samples_per_second": 8.876, + "eval_steps_per_second": 1.544, + "step": 539 + }, + { + "epoch": 28.99, + "eval_gen_len": 17.0522, + "eval_loss": 15.880877494812012, + "eval_rouge1": 0.0898, + "eval_rouge2": 0.0212, + "eval_rougeL": 0.0716, + "eval_rougeLsum": 0.0711, + "eval_runtime": 12.3917, + "eval_samples_per_second": 9.28, + "eval_steps_per_second": 1.614, + "step": 558 + }, + { + "epoch": 29.97, + "eval_gen_len": 17.6087, + "eval_loss": 15.38318920135498, + "eval_rouge1": 0.09, + "eval_rouge2": 0.0179, + "eval_rougeL": 0.0717, + "eval_rougeLsum": 0.0717, + "eval_runtime": 12.4102, + "eval_samples_per_second": 9.267, + "eval_steps_per_second": 1.612, + "step": 577 + }, + { + "epoch": 30.96, + "eval_gen_len": 17.6087, + "eval_loss": 14.880407333374023, + "eval_rouge1": 0.0921, + "eval_rouge2": 0.02, + "eval_rougeL": 0.0768, + "eval_rougeLsum": 0.0765, + "eval_runtime": 12.1352, + "eval_samples_per_second": 9.477, + "eval_steps_per_second": 1.648, + "step": 596 + }, + { + "epoch": 32.0, + "eval_gen_len": 17.7478, + "eval_loss": 14.299490928649902, + "eval_rouge1": 0.0856, + "eval_rouge2": 0.0197, + "eval_rougeL": 0.0713, + "eval_rougeLsum": 0.071, + "eval_runtime": 12.7436, + "eval_samples_per_second": 9.024, + "eval_steps_per_second": 1.569, + "step": 616 + }, + { + "epoch": 32.99, + "eval_gen_len": 17.0522, + "eval_loss": 13.684185981750488, + "eval_rouge1": 0.0796, + "eval_rouge2": 0.0153, + "eval_rougeL": 0.0627, + "eval_rougeLsum": 0.0626, + "eval_runtime": 12.5048, + "eval_samples_per_second": 9.196, + "eval_steps_per_second": 1.599, + "step": 635 + }, + { + "epoch": 33.97, + "eval_gen_len": 17.3304, + "eval_loss": 13.098108291625977, + "eval_rouge1": 0.0772, + "eval_rouge2": 0.0098, + "eval_rougeL": 0.0646, + "eval_rougeLsum": 0.0643, + "eval_runtime": 12.1222, + "eval_samples_per_second": 9.487, + "eval_steps_per_second": 1.65, + "step": 654 + }, + { + "epoch": 34.96, + "eval_gen_len": 17.4696, + "eval_loss": 12.545892715454102, + "eval_rouge1": 0.0751, + "eval_rouge2": 0.0107, + "eval_rougeL": 0.0622, + "eval_rougeLsum": 0.0622, + "eval_runtime": 12.1398, + "eval_samples_per_second": 9.473, + "eval_steps_per_second": 1.647, + "step": 673 + }, + { + "epoch": 36.0, + "eval_gen_len": 17.3304, + "eval_loss": 12.004250526428223, + "eval_rouge1": 0.0704, + "eval_rouge2": 0.0081, + "eval_rougeL": 0.0588, + "eval_rougeLsum": 0.0588, + "eval_runtime": 12.5092, + "eval_samples_per_second": 9.193, + "eval_steps_per_second": 1.599, + "step": 693 + }, + { + "epoch": 36.99, + "eval_gen_len": 17.887, + "eval_loss": 11.514721870422363, + "eval_rouge1": 0.0672, + "eval_rouge2": 0.0079, + "eval_rougeL": 0.0577, + "eval_rougeLsum": 0.0577, + "eval_runtime": 12.4809, + "eval_samples_per_second": 9.214, + "eval_steps_per_second": 1.602, + "step": 712 + }, + { + "epoch": 37.97, + "eval_gen_len": 17.6087, + "eval_loss": 11.038866996765137, + "eval_rouge1": 0.0513, + "eval_rouge2": 0.0045, + "eval_rougeL": 0.046, + "eval_rougeLsum": 0.0457, + "eval_runtime": 12.6414, + "eval_samples_per_second": 9.097, + "eval_steps_per_second": 1.582, + "step": 731 + }, + { + "epoch": 38.96, + "eval_gen_len": 17.1913, + "eval_loss": 10.570833206176758, + "eval_rouge1": 0.0468, + "eval_rouge2": 0.0029, + "eval_rougeL": 0.0404, + "eval_rougeLsum": 0.0403, + "eval_runtime": 12.7674, + "eval_samples_per_second": 9.007, + "eval_steps_per_second": 1.566, + "step": 750 + }, + { + "epoch": 40.0, + "eval_gen_len": 17.3565, + "eval_loss": 10.076028823852539, + "eval_rouge1": 0.0352, + "eval_rouge2": 0.0037, + "eval_rougeL": 0.0305, + "eval_rougeLsum": 0.0304, + "eval_runtime": 12.8987, + "eval_samples_per_second": 8.916, + "eval_steps_per_second": 1.551, + "step": 770 + }, + { + "epoch": 40.99, + "eval_gen_len": 17.4957, + "eval_loss": 9.603371620178223, + "eval_rouge1": 0.024, + "eval_rouge2": 0.0024, + "eval_rougeL": 0.0218, + "eval_rougeLsum": 0.0218, + "eval_runtime": 12.2083, + "eval_samples_per_second": 9.42, + "eval_steps_per_second": 1.638, + "step": 789 + }, + { + "epoch": 41.97, + "eval_gen_len": 18.3043, + "eval_loss": 9.131211280822754, + "eval_rouge1": 0.0193, + "eval_rouge2": 0.0022, + "eval_rougeL": 0.0184, + "eval_rougeLsum": 0.0185, + "eval_runtime": 12.1389, + "eval_samples_per_second": 9.474, + "eval_steps_per_second": 1.648, + "step": 808 + }, + { + "epoch": 42.96, + "eval_gen_len": 18.7217, + "eval_loss": 8.668445587158203, + "eval_rouge1": 0.0116, + "eval_rouge2": 0.0013, + "eval_rougeL": 0.0109, + "eval_rougeLsum": 0.0108, + "eval_runtime": 12.4712, + "eval_samples_per_second": 9.221, + "eval_steps_per_second": 1.604, + "step": 827 + }, + { + "epoch": 44.0, + "eval_gen_len": 18.8609, + "eval_loss": 8.1836576461792, + "eval_rouge1": 0.0031, + "eval_rouge2": 0.0004, + "eval_rougeL": 0.0032, + "eval_rougeLsum": 0.0032, + "eval_runtime": 12.1395, + "eval_samples_per_second": 9.473, + "eval_steps_per_second": 1.648, + "step": 847 + }, + { + "epoch": 44.99, + "eval_gen_len": 18.8609, + "eval_loss": 7.736245632171631, + "eval_rouge1": 0.0028, + "eval_rouge2": 0.0002, + "eval_rougeL": 0.0028, + "eval_rougeLsum": 0.0028, + "eval_runtime": 12.2043, + "eval_samples_per_second": 9.423, + "eval_steps_per_second": 1.639, + "step": 866 + }, + { + "epoch": 45.97, + "eval_gen_len": 18.8609, + "eval_loss": 7.298835277557373, + "eval_rouge1": 0.0018, + "eval_rouge2": 0.0004, + "eval_rougeL": 0.0018, + "eval_rougeLsum": 0.0018, + "eval_runtime": 12.1301, + "eval_samples_per_second": 9.481, + "eval_steps_per_second": 1.649, + "step": 885 + }, + { + "epoch": 46.96, + "eval_gen_len": 19.0, + "eval_loss": 6.873920917510986, + "eval_rouge1": 0.001, + "eval_rouge2": 0.0002, + "eval_rougeL": 0.001, + "eval_rougeLsum": 0.0009, + "eval_runtime": 12.1519, + "eval_samples_per_second": 9.464, + "eval_steps_per_second": 1.646, + "step": 904 + }, + { + "epoch": 48.0, + "eval_gen_len": 19.0, + "eval_loss": 6.440176486968994, + "eval_rouge1": 0.0001, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0001, + "eval_rougeLsum": 0.0001, + "eval_runtime": 12.3618, + "eval_samples_per_second": 9.303, + "eval_steps_per_second": 1.618, + "step": 924 + }, + { + "epoch": 48.99, + "eval_gen_len": 19.0, + "eval_loss": 6.049317359924316, + "eval_rouge1": 0.0006, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0006, + "eval_rougeLsum": 0.0005, + "eval_runtime": 12.167, + "eval_samples_per_second": 9.452, + "eval_steps_per_second": 1.644, + "step": 943 + }, + { + "epoch": 49.97, + "eval_gen_len": 19.0, + "eval_loss": 5.643195152282715, + "eval_rouge1": 0.0002, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0002, + "eval_rougeLsum": 0.0002, + "eval_runtime": 11.9138, + "eval_samples_per_second": 9.653, + "eval_steps_per_second": 1.679, + "step": 962 + }, + { + "epoch": 50.96, + "eval_gen_len": 19.0, + "eval_loss": 5.2581987380981445, + "eval_rouge1": 0.0007, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0008, + "eval_rougeLsum": 0.0007, + "eval_runtime": 12.5979, + "eval_samples_per_second": 9.129, + "eval_steps_per_second": 1.588, + "step": 981 + }, + { + "epoch": 51.95, + "grad_norm": 5.70858097076416, + "learning_rate": 8.949473684210527e-06, + "loss": 11.5478, + "step": 1000 + }, + { + "epoch": 52.0, + "eval_gen_len": 19.0, + "eval_loss": 4.877782344818115, + "eval_rouge1": 0.0006, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0006, + "eval_rougeLsum": 0.0006, + "eval_runtime": 12.0728, + "eval_samples_per_second": 9.526, + "eval_steps_per_second": 1.657, + "step": 1001 + }, + { + "epoch": 52.99, + "eval_gen_len": 19.0, + "eval_loss": 4.53688383102417, + "eval_rouge1": 0.0006, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0006, + "eval_rougeLsum": 0.0006, + "eval_runtime": 12.1263, + "eval_samples_per_second": 9.484, + "eval_steps_per_second": 1.649, + "step": 1020 + }, + { + "epoch": 53.97, + "eval_gen_len": 19.0, + "eval_loss": 4.222665309906006, + "eval_rouge1": 0.0002, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0002, + "eval_rougeLsum": 0.0002, + "eval_runtime": 12.5806, + "eval_samples_per_second": 9.141, + "eval_steps_per_second": 1.59, + "step": 1039 + }, + { + "epoch": 54.96, + "eval_gen_len": 19.0, + "eval_loss": 3.9297854900360107, + "eval_rouge1": 0.0008, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0008, + "eval_rougeLsum": 0.0007, + "eval_runtime": 12.7467, + "eval_samples_per_second": 9.022, + "eval_steps_per_second": 1.569, + "step": 1058 + }, + { + "epoch": 56.0, + "eval_gen_len": 19.0, + "eval_loss": 3.6505942344665527, + "eval_rouge1": 0.0005, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0005, + "eval_rougeLsum": 0.0005, + "eval_runtime": 12.6962, + "eval_samples_per_second": 9.058, + "eval_steps_per_second": 1.575, + "step": 1078 + }, + { + "epoch": 56.99, + "eval_gen_len": 19.0, + "eval_loss": 3.4100279808044434, + "eval_rouge1": 0.0002, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0002, + "eval_rougeLsum": 0.0002, + "eval_runtime": 12.5475, + "eval_samples_per_second": 9.165, + "eval_steps_per_second": 1.594, + "step": 1097 + }, + { + "epoch": 57.97, + "eval_gen_len": 19.0, + "eval_loss": 3.197094202041626, + "eval_rouge1": 0.0006, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0006, + "eval_rougeLsum": 0.0006, + "eval_runtime": 12.1721, + "eval_samples_per_second": 9.448, + "eval_steps_per_second": 1.643, + "step": 1116 + }, + { + "epoch": 58.96, + "eval_gen_len": 18.9913, + "eval_loss": 3.0094308853149414, + "eval_rouge1": 0.0006, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0006, + "eval_rougeLsum": 0.0006, + "eval_runtime": 12.2313, + "eval_samples_per_second": 9.402, + "eval_steps_per_second": 1.635, + "step": 1135 + }, + { + "epoch": 60.0, + "eval_gen_len": 18.9913, + "eval_loss": 2.841013193130493, + "eval_rouge1": 0.0008, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0008, + "eval_rougeLsum": 0.0008, + "eval_runtime": 12.5511, + "eval_samples_per_second": 9.163, + "eval_steps_per_second": 1.593, + "step": 1155 + }, + { + "epoch": 60.99, + "eval_gen_len": 18.9826, + "eval_loss": 2.698159694671631, + "eval_rouge1": 0.0007, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0007, + "eval_rougeLsum": 0.0007, + "eval_runtime": 12.0968, + "eval_samples_per_second": 9.507, + "eval_steps_per_second": 1.653, + "step": 1174 + }, + { + "epoch": 61.97, + "eval_gen_len": 18.6783, + "eval_loss": 2.5645217895507812, + "eval_rouge1": 0.0011, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0011, + "eval_rougeLsum": 0.0011, + "eval_runtime": 12.1821, + "eval_samples_per_second": 9.44, + "eval_steps_per_second": 1.642, + "step": 1193 + }, + { + "epoch": 62.96, + "eval_gen_len": 17.3565, + "eval_loss": 2.445538282394409, + "eval_rouge1": 0.0007, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0007, + "eval_rougeLsum": 0.0007, + "eval_runtime": 12.1339, + "eval_samples_per_second": 9.478, + "eval_steps_per_second": 1.648, + "step": 1212 + }, + { + "epoch": 64.0, + "eval_gen_len": 13.5043, + "eval_loss": 2.338679075241089, + "eval_rouge1": 0.0011, + "eval_rouge2": 0.0, + "eval_rougeL": 0.001, + "eval_rougeLsum": 0.001, + "eval_runtime": 12.4085, + "eval_samples_per_second": 9.268, + "eval_steps_per_second": 1.612, + "step": 1232 + }, + { + "epoch": 64.99, + "eval_gen_len": 9.4348, + "eval_loss": 2.2483484745025635, + "eval_rouge1": 0.0002, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0002, + "eval_rougeLsum": 0.0002, + "eval_runtime": 12.3896, + "eval_samples_per_second": 9.282, + "eval_steps_per_second": 1.614, + "step": 1251 + }, + { + "epoch": 65.97, + "eval_gen_len": 6.9652, + "eval_loss": 2.1728155612945557, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.4531, + "eval_samples_per_second": 9.235, + "eval_steps_per_second": 1.606, + "step": 1270 + }, + { + "epoch": 66.96, + "eval_gen_len": 6.2957, + "eval_loss": 2.1103546619415283, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.2925, + "eval_samples_per_second": 9.355, + "eval_steps_per_second": 1.627, + "step": 1289 + }, + { + "epoch": 68.0, + "eval_gen_len": 6.0, + "eval_loss": 2.0531256198883057, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.1269, + "eval_samples_per_second": 9.483, + "eval_steps_per_second": 1.649, + "step": 1309 + }, + { + "epoch": 68.99, + "eval_gen_len": 5.7043, + "eval_loss": 2.006763219833374, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.0803, + "eval_samples_per_second": 9.52, + "eval_steps_per_second": 1.656, + "step": 1328 + }, + { + "epoch": 69.97, + "eval_gen_len": 5.6609, + "eval_loss": 1.9675697088241577, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 13.197, + "eval_samples_per_second": 8.714, + "eval_steps_per_second": 1.515, + "step": 1347 + }, + { + "epoch": 70.96, + "eval_gen_len": 5.6, + "eval_loss": 1.9337714910507202, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.696, + "eval_samples_per_second": 9.058, + "eval_steps_per_second": 1.575, + "step": 1366 + }, + { + "epoch": 72.0, + "eval_gen_len": 5.6174, + "eval_loss": 1.9011404514312744, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.5629, + "eval_samples_per_second": 9.154, + "eval_steps_per_second": 1.592, + "step": 1386 + }, + { + "epoch": 72.99, + "eval_gen_len": 5.6435, + "eval_loss": 1.8734184503555298, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.9654, + "eval_samples_per_second": 8.87, + "eval_steps_per_second": 1.543, + "step": 1405 + }, + { + "epoch": 73.97, + "eval_gen_len": 5.7739, + "eval_loss": 1.84665846824646, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.8257, + "eval_samples_per_second": 8.966, + "eval_steps_per_second": 1.559, + "step": 1424 + }, + { + "epoch": 74.96, + "eval_gen_len": 5.7478, + "eval_loss": 1.8196372985839844, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.89, + "eval_samples_per_second": 8.922, + "eval_steps_per_second": 1.552, + "step": 1443 + }, + { + "epoch": 76.0, + "eval_gen_len": 5.7217, + "eval_loss": 1.797453761100769, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.4957, + "eval_samples_per_second": 9.203, + "eval_steps_per_second": 1.601, + "step": 1463 + }, + { + "epoch": 76.99, + "eval_gen_len": 5.8174, + "eval_loss": 1.7788159847259521, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.4326, + "eval_samples_per_second": 9.25, + "eval_steps_per_second": 1.609, + "step": 1482 + }, + { + "epoch": 77.92, + "grad_norm": 2.0204899311065674, + "learning_rate": 8.42421052631579e-06, + "loss": 3.2357, + "step": 1500 + }, + { + "epoch": 77.97, + "eval_gen_len": 5.8957, + "eval_loss": 1.76212739944458, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.5086, + "eval_samples_per_second": 9.194, + "eval_steps_per_second": 1.599, + "step": 1501 + }, + { + "epoch": 78.96, + "eval_gen_len": 5.8957, + "eval_loss": 1.744727373123169, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.3492, + "eval_samples_per_second": 9.312, + "eval_steps_per_second": 1.62, + "step": 1520 + }, + { + "epoch": 80.0, + "eval_gen_len": 5.9391, + "eval_loss": 1.7277677059173584, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.126, + "eval_samples_per_second": 9.484, + "eval_steps_per_second": 1.649, + "step": 1540 + }, + { + "epoch": 80.99, + "eval_gen_len": 5.8435, + "eval_loss": 1.7146191596984863, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.5132, + "eval_samples_per_second": 9.19, + "eval_steps_per_second": 1.598, + "step": 1559 + }, + { + "epoch": 81.97, + "eval_gen_len": 5.513, + "eval_loss": 1.7026437520980835, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.7453, + "eval_samples_per_second": 9.023, + "eval_steps_per_second": 1.569, + "step": 1578 + }, + { + "epoch": 82.96, + "eval_gen_len": 5.5652, + "eval_loss": 1.68914794921875, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.98, + "eval_samples_per_second": 8.86, + "eval_steps_per_second": 1.541, + "step": 1597 + }, + { + "epoch": 84.0, + "eval_gen_len": 5.3304, + "eval_loss": 1.6754295825958252, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.127, + "eval_samples_per_second": 9.483, + "eval_steps_per_second": 1.649, + "step": 1617 + }, + { + "epoch": 84.99, + "eval_gen_len": 5.6435, + "eval_loss": 1.6632497310638428, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.2091, + "eval_samples_per_second": 9.419, + "eval_steps_per_second": 1.638, + "step": 1636 + }, + { + "epoch": 85.97, + "eval_gen_len": 5.9652, + "eval_loss": 1.652411699295044, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.3822, + "eval_samples_per_second": 9.288, + "eval_steps_per_second": 1.615, + "step": 1655 + }, + { + "epoch": 86.96, + "eval_gen_len": 5.9478, + "eval_loss": 1.642953872680664, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.0847, + "eval_samples_per_second": 9.516, + "eval_steps_per_second": 1.655, + "step": 1674 + }, + { + "epoch": 88.0, + "eval_gen_len": 5.4696, + "eval_loss": 1.6336156129837036, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.5368, + "eval_samples_per_second": 9.173, + "eval_steps_per_second": 1.595, + "step": 1694 + }, + { + "epoch": 88.99, + "eval_gen_len": 5.4, + "eval_loss": 1.6246790885925293, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.4452, + "eval_samples_per_second": 9.241, + "eval_steps_per_second": 1.607, + "step": 1713 + }, + { + "epoch": 89.97, + "eval_gen_len": 5.7739, + "eval_loss": 1.615963339805603, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.0491, + "eval_samples_per_second": 9.544, + "eval_steps_per_second": 1.66, + "step": 1732 + }, + { + "epoch": 90.96, + "eval_gen_len": 6.2348, + "eval_loss": 1.606810450553894, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.2296, + "eval_samples_per_second": 9.403, + "eval_steps_per_second": 1.635, + "step": 1751 + }, + { + "epoch": 92.0, + "eval_gen_len": 6.1652, + "eval_loss": 1.59696626663208, + "eval_rouge1": 0.0002, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0002, + "eval_rougeLsum": 0.0002, + "eval_runtime": 12.4411, + "eval_samples_per_second": 9.244, + "eval_steps_per_second": 1.608, + "step": 1771 + }, + { + "epoch": 92.99, + "eval_gen_len": 6.3739, + "eval_loss": 1.5894649028778076, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 11.8551, + "eval_samples_per_second": 9.7, + "eval_steps_per_second": 1.687, + "step": 1790 + }, + { + "epoch": 93.97, + "eval_gen_len": 6.5043, + "eval_loss": 1.5818349123001099, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.2434, + "eval_samples_per_second": 9.393, + "eval_steps_per_second": 1.634, + "step": 1809 + }, + { + "epoch": 94.96, + "eval_gen_len": 6.3565, + "eval_loss": 1.5746902227401733, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.3859, + "eval_samples_per_second": 9.285, + "eval_steps_per_second": 1.615, + "step": 1828 + }, + { + "epoch": 96.0, + "eval_gen_len": 6.8087, + "eval_loss": 1.567280650138855, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.1358, + "eval_samples_per_second": 9.476, + "eval_steps_per_second": 1.648, + "step": 1848 + }, + { + "epoch": 96.99, + "eval_gen_len": 6.8, + "eval_loss": 1.5616425275802612, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.3339, + "eval_samples_per_second": 9.324, + "eval_steps_per_second": 1.622, + "step": 1867 + }, + { + "epoch": 97.97, + "eval_gen_len": 6.6522, + "eval_loss": 1.5548292398452759, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.1749, + "eval_samples_per_second": 9.446, + "eval_steps_per_second": 1.643, + "step": 1886 + }, + { + "epoch": 98.96, + "eval_gen_len": 6.5913, + "eval_loss": 1.5485645532608032, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 11.995, + "eval_samples_per_second": 9.587, + "eval_steps_per_second": 1.667, + "step": 1905 + }, + { + "epoch": 100.0, + "eval_gen_len": 6.4522, + "eval_loss": 1.5418448448181152, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 11.7913, + "eval_samples_per_second": 9.753, + "eval_steps_per_second": 1.696, + "step": 1925 + }, + { + "epoch": 100.99, + "eval_gen_len": 5.6957, + "eval_loss": 1.5365816354751587, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.5711, + "eval_samples_per_second": 9.148, + "eval_steps_per_second": 1.591, + "step": 1944 + }, + { + "epoch": 101.97, + "eval_gen_len": 5.5739, + "eval_loss": 1.5312349796295166, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.7436, + "eval_samples_per_second": 9.024, + "eval_steps_per_second": 1.569, + "step": 1963 + }, + { + "epoch": 102.96, + "eval_gen_len": 5.4174, + "eval_loss": 1.5244060754776, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.3775, + "eval_samples_per_second": 9.291, + "eval_steps_per_second": 1.616, + "step": 1982 + }, + { + "epoch": 103.9, + "grad_norm": 1.870866298675537, + "learning_rate": 7.898947368421053e-06, + "loss": 1.8779, + "step": 2000 + }, + { + "epoch": 104.0, + "eval_gen_len": 5.3565, + "eval_loss": 1.5186233520507812, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.0793, + "eval_samples_per_second": 9.52, + "eval_steps_per_second": 1.656, + "step": 2002 + }, + { + "epoch": 104.99, + "eval_gen_len": 5.6174, + "eval_loss": 1.5112248659133911, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 11.9289, + "eval_samples_per_second": 9.64, + "eval_steps_per_second": 1.677, + "step": 2021 + }, + { + "epoch": 105.97, + "eval_gen_len": 5.9217, + "eval_loss": 1.5045664310455322, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 11.9549, + "eval_samples_per_second": 9.619, + "eval_steps_per_second": 1.673, + "step": 2040 + }, + { + "epoch": 106.96, + "eval_gen_len": 5.9913, + "eval_loss": 1.4977103471755981, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.5176, + "eval_samples_per_second": 9.187, + "eval_steps_per_second": 1.598, + "step": 2059 + }, + { + "epoch": 108.0, + "eval_gen_len": 5.4957, + "eval_loss": 1.491757869720459, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.36, + "eval_samples_per_second": 9.304, + "eval_steps_per_second": 1.618, + "step": 2079 + }, + { + "epoch": 108.99, + "eval_gen_len": 6.0348, + "eval_loss": 1.486743688583374, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.5097, + "eval_samples_per_second": 9.193, + "eval_steps_per_second": 1.599, + "step": 2098 + }, + { + "epoch": 109.97, + "eval_gen_len": 6.3304, + "eval_loss": 1.480473279953003, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 11.9075, + "eval_samples_per_second": 9.658, + "eval_steps_per_second": 1.68, + "step": 2117 + }, + { + "epoch": 110.96, + "eval_gen_len": 6.2, + "eval_loss": 1.4745731353759766, + "eval_rouge1": 0.0003, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0003, + "eval_rougeLsum": 0.0003, + "eval_runtime": 12.2105, + "eval_samples_per_second": 9.418, + "eval_steps_per_second": 1.638, + "step": 2136 + }, + { + "epoch": 112.0, + "eval_gen_len": 5.9826, + "eval_loss": 1.468475341796875, + "eval_rouge1": 0.0009, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0008, + "eval_rougeLsum": 0.0008, + "eval_runtime": 12.6855, + "eval_samples_per_second": 9.065, + "eval_steps_per_second": 1.577, + "step": 2156 + }, + { + "epoch": 112.99, + "eval_gen_len": 5.8261, + "eval_loss": 1.4624364376068115, + "eval_rouge1": 0.0009, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0008, + "eval_rougeLsum": 0.0008, + "eval_runtime": 12.3065, + "eval_samples_per_second": 9.345, + "eval_steps_per_second": 1.625, + "step": 2175 + }, + { + "epoch": 113.97, + "eval_gen_len": 5.487, + "eval_loss": 1.4564381837844849, + "eval_rouge1": 0.0009, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0008, + "eval_rougeLsum": 0.0008, + "eval_runtime": 13.0224, + "eval_samples_per_second": 8.831, + "eval_steps_per_second": 1.536, + "step": 2194 + }, + { + "epoch": 114.96, + "eval_gen_len": 5.1565, + "eval_loss": 1.4514414072036743, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.4022, + "eval_samples_per_second": 9.273, + "eval_steps_per_second": 1.613, + "step": 2213 + }, + { + "epoch": 116.0, + "eval_gen_len": 5.4957, + "eval_loss": 1.442409873008728, + "eval_rouge1": 0.001, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0009, + "eval_rougeLsum": 0.0009, + "eval_runtime": 12.3722, + "eval_samples_per_second": 9.295, + "eval_steps_per_second": 1.617, + "step": 2233 + }, + { + "epoch": 116.99, + "eval_gen_len": 5.7391, + "eval_loss": 1.4344819784164429, + "eval_rouge1": 0.0017, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0017, + "eval_rougeLsum": 0.0018, + "eval_runtime": 12.5554, + "eval_samples_per_second": 9.159, + "eval_steps_per_second": 1.593, + "step": 2252 + }, + { + "epoch": 117.97, + "eval_gen_len": 6.0435, + "eval_loss": 1.4248623847961426, + "eval_rouge1": 0.0021, + "eval_rouge2": 0.0, + "eval_rougeL": 0.002, + "eval_rougeLsum": 0.0021, + "eval_runtime": 12.1087, + "eval_samples_per_second": 9.497, + "eval_steps_per_second": 1.652, + "step": 2271 + }, + { + "epoch": 118.96, + "eval_gen_len": 6.4783, + "eval_loss": 1.4156382083892822, + "eval_rouge1": 0.0033, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0032, + "eval_rougeLsum": 0.0033, + "eval_runtime": 12.0374, + "eval_samples_per_second": 9.554, + "eval_steps_per_second": 1.661, + "step": 2290 + }, + { + "epoch": 120.0, + "eval_gen_len": 6.3043, + "eval_loss": 1.408909559249878, + "eval_rouge1": 0.0038, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0037, + "eval_rougeLsum": 0.0038, + "eval_runtime": 12.5996, + "eval_samples_per_second": 9.127, + "eval_steps_per_second": 1.587, + "step": 2310 + }, + { + "epoch": 120.99, + "eval_gen_len": 6.1043, + "eval_loss": 1.4028282165527344, + "eval_rouge1": 0.0043, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0042, + "eval_rougeLsum": 0.0043, + "eval_runtime": 12.384, + "eval_samples_per_second": 9.286, + "eval_steps_per_second": 1.615, + "step": 2329 + }, + { + "epoch": 121.97, + "eval_gen_len": 5.9478, + "eval_loss": 1.3989005088806152, + "eval_rouge1": 0.0036, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0036, + "eval_rougeLsum": 0.0037, + "eval_runtime": 12.3007, + "eval_samples_per_second": 9.349, + "eval_steps_per_second": 1.626, + "step": 2348 + }, + { + "epoch": 122.96, + "eval_gen_len": 5.4348, + "eval_loss": 1.3940106630325317, + "eval_rouge1": 0.0029, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0026, + "eval_rougeLsum": 0.0026, + "eval_runtime": 13.1431, + "eval_samples_per_second": 8.75, + "eval_steps_per_second": 1.522, + "step": 2367 + }, + { + "epoch": 124.0, + "eval_gen_len": 5.3913, + "eval_loss": 1.387468695640564, + "eval_rouge1": 0.0036, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0034, + "eval_rougeLsum": 0.0035, + "eval_runtime": 12.0602, + "eval_samples_per_second": 9.535, + "eval_steps_per_second": 1.658, + "step": 2387 + }, + { + "epoch": 124.99, + "eval_gen_len": 5.4174, + "eval_loss": 1.3833892345428467, + "eval_rouge1": 0.0031, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0031, + "eval_rougeLsum": 0.0032, + "eval_runtime": 12.5404, + "eval_samples_per_second": 9.17, + "eval_steps_per_second": 1.595, + "step": 2406 + }, + { + "epoch": 125.97, + "eval_gen_len": 5.8, + "eval_loss": 1.3742746114730835, + "eval_rouge1": 0.0034, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0034, + "eval_rougeLsum": 0.0034, + "eval_runtime": 12.5374, + "eval_samples_per_second": 9.173, + "eval_steps_per_second": 1.595, + "step": 2425 + }, + { + "epoch": 126.96, + "eval_gen_len": 6.2348, + "eval_loss": 1.3673855066299438, + "eval_rouge1": 0.0054, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0051, + "eval_rougeLsum": 0.0052, + "eval_runtime": 12.3787, + "eval_samples_per_second": 9.29, + "eval_steps_per_second": 1.616, + "step": 2444 + }, + { + "epoch": 128.0, + "eval_gen_len": 6.3739, + "eval_loss": 1.3610302209854126, + "eval_rouge1": 0.0051, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0051, + "eval_rougeLsum": 0.0053, + "eval_runtime": 12.0722, + "eval_samples_per_second": 9.526, + "eval_steps_per_second": 1.657, + "step": 2464 + }, + { + "epoch": 128.99, + "eval_gen_len": 7.1565, + "eval_loss": 1.351613163948059, + "eval_rouge1": 0.0062, + "eval_rouge2": 0.0002, + "eval_rougeL": 0.0056, + "eval_rougeLsum": 0.0057, + "eval_runtime": 11.971, + "eval_samples_per_second": 9.607, + "eval_steps_per_second": 1.671, + "step": 2483 + }, + { + "epoch": 129.87, + "grad_norm": 2.394576072692871, + "learning_rate": 7.3726315789473694e-06, + "loss": 1.6063, + "step": 2500 + }, + { + "epoch": 129.97, + "eval_gen_len": 7.4522, + "eval_loss": 1.3424580097198486, + "eval_rouge1": 0.0055, + "eval_rouge2": 0.0002, + "eval_rougeL": 0.0053, + "eval_rougeLsum": 0.0053, + "eval_runtime": 12.0884, + "eval_samples_per_second": 9.513, + "eval_steps_per_second": 1.654, + "step": 2502 + }, + { + "epoch": 130.96, + "eval_gen_len": 7.4609, + "eval_loss": 1.334855556488037, + "eval_rouge1": 0.0044, + "eval_rouge2": 0.0002, + "eval_rougeL": 0.0042, + "eval_rougeLsum": 0.0041, + "eval_runtime": 11.9536, + "eval_samples_per_second": 9.621, + "eval_steps_per_second": 1.673, + "step": 2521 + }, + { + "epoch": 132.0, + "eval_gen_len": 7.4522, + "eval_loss": 1.3267827033996582, + "eval_rouge1": 0.0048, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0046, + "eval_rougeLsum": 0.0046, + "eval_runtime": 12.6532, + "eval_samples_per_second": 9.089, + "eval_steps_per_second": 1.581, + "step": 2541 + }, + { + "epoch": 132.99, + "eval_gen_len": 7.8522, + "eval_loss": 1.3201897144317627, + "eval_rouge1": 0.0096, + "eval_rouge2": 0.0002, + "eval_rougeL": 0.0088, + "eval_rougeLsum": 0.0089, + "eval_runtime": 12.2562, + "eval_samples_per_second": 9.383, + "eval_steps_per_second": 1.632, + "step": 2560 + }, + { + "epoch": 133.97, + "eval_gen_len": 7.5304, + "eval_loss": 1.3138891458511353, + "eval_rouge1": 0.0074, + "eval_rouge2": 0.0002, + "eval_rougeL": 0.0075, + "eval_rougeLsum": 0.0075, + "eval_runtime": 11.9989, + "eval_samples_per_second": 9.584, + "eval_steps_per_second": 1.667, + "step": 2579 + }, + { + "epoch": 134.96, + "eval_gen_len": 7.2348, + "eval_loss": 1.3059097528457642, + "eval_rouge1": 0.005, + "eval_rouge2": 0.0, + "eval_rougeL": 0.005, + "eval_rougeLsum": 0.0051, + "eval_runtime": 11.9785, + "eval_samples_per_second": 9.6, + "eval_steps_per_second": 1.67, + "step": 2598 + }, + { + "epoch": 136.0, + "eval_gen_len": 7.1304, + "eval_loss": 1.298433780670166, + "eval_rouge1": 0.005, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0046, + "eval_rougeLsum": 0.0047, + "eval_runtime": 12.1248, + "eval_samples_per_second": 9.485, + "eval_steps_per_second": 1.65, + "step": 2618 + }, + { + "epoch": 136.99, + "eval_gen_len": 8.0261, + "eval_loss": 1.29219651222229, + "eval_rouge1": 0.0072, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0069, + "eval_rougeLsum": 0.0069, + "eval_runtime": 12.5125, + "eval_samples_per_second": 9.191, + "eval_steps_per_second": 1.598, + "step": 2637 + }, + { + "epoch": 137.97, + "eval_gen_len": 8.4087, + "eval_loss": 1.2833046913146973, + "eval_rouge1": 0.0108, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0097, + "eval_rougeLsum": 0.0099, + "eval_runtime": 12.0943, + "eval_samples_per_second": 9.509, + "eval_steps_per_second": 1.654, + "step": 2656 + }, + { + "epoch": 138.96, + "eval_gen_len": 8.3739, + "eval_loss": 1.278290033340454, + "eval_rouge1": 0.0111, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0094, + "eval_rougeLsum": 0.0095, + "eval_runtime": 12.8059, + "eval_samples_per_second": 8.98, + "eval_steps_per_second": 1.562, + "step": 2675 + }, + { + "epoch": 140.0, + "eval_gen_len": 8.5043, + "eval_loss": 1.2764371633529663, + "eval_rouge1": 0.0114, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0104, + "eval_rougeLsum": 0.0108, + "eval_runtime": 12.1122, + "eval_samples_per_second": 9.495, + "eval_steps_per_second": 1.651, + "step": 2695 + }, + { + "epoch": 140.99, + "eval_gen_len": 8.6261, + "eval_loss": 1.2698535919189453, + "eval_rouge1": 0.0139, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0126, + "eval_rougeLsum": 0.0128, + "eval_runtime": 12.5216, + "eval_samples_per_second": 9.184, + "eval_steps_per_second": 1.597, + "step": 2714 + }, + { + "epoch": 141.97, + "eval_gen_len": 8.2435, + "eval_loss": 1.2616974115371704, + "eval_rouge1": 0.0135, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0119, + "eval_rougeLsum": 0.012, + "eval_runtime": 12.2763, + "eval_samples_per_second": 9.368, + "eval_steps_per_second": 1.629, + "step": 2733 + }, + { + "epoch": 142.96, + "eval_gen_len": 8.713, + "eval_loss": 1.2539962530136108, + "eval_rouge1": 0.0144, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0123, + "eval_rougeLsum": 0.0125, + "eval_runtime": 12.1565, + "eval_samples_per_second": 9.46, + "eval_steps_per_second": 1.645, + "step": 2752 + }, + { + "epoch": 144.0, + "eval_gen_len": 8.9826, + "eval_loss": 1.2482250928878784, + "eval_rouge1": 0.0153, + "eval_rouge2": 0.0002, + "eval_rougeL": 0.0137, + "eval_rougeLsum": 0.0137, + "eval_runtime": 12.3974, + "eval_samples_per_second": 9.276, + "eval_steps_per_second": 1.613, + "step": 2772 + }, + { + "epoch": 144.99, + "eval_gen_len": 8.9391, + "eval_loss": 1.2442501783370972, + "eval_rouge1": 0.0139, + "eval_rouge2": 0.0006, + "eval_rougeL": 0.013, + "eval_rougeLsum": 0.0129, + "eval_runtime": 12.5443, + "eval_samples_per_second": 9.168, + "eval_steps_per_second": 1.594, + "step": 2791 + }, + { + "epoch": 145.97, + "eval_gen_len": 9.3565, + "eval_loss": 1.2381587028503418, + "eval_rouge1": 0.0187, + "eval_rouge2": 0.0011, + "eval_rougeL": 0.0151, + "eval_rougeLsum": 0.0151, + "eval_runtime": 12.381, + "eval_samples_per_second": 9.288, + "eval_steps_per_second": 1.615, + "step": 2810 + }, + { + "epoch": 146.96, + "eval_gen_len": 9.513, + "eval_loss": 1.2287580966949463, + "eval_rouge1": 0.0202, + "eval_rouge2": 0.0008, + "eval_rougeL": 0.0172, + "eval_rougeLsum": 0.0172, + "eval_runtime": 12.6591, + "eval_samples_per_second": 9.084, + "eval_steps_per_second": 1.58, + "step": 2829 + }, + { + "epoch": 148.0, + "eval_gen_len": 8.5565, + "eval_loss": 1.2264941930770874, + "eval_rouge1": 0.0147, + "eval_rouge2": 0.0004, + "eval_rougeL": 0.0126, + "eval_rougeLsum": 0.0124, + "eval_runtime": 11.9708, + "eval_samples_per_second": 9.607, + "eval_steps_per_second": 1.671, + "step": 2849 + }, + { + "epoch": 148.99, + "eval_gen_len": 8.7652, + "eval_loss": 1.222589135169983, + "eval_rouge1": 0.0153, + "eval_rouge2": 0.0005, + "eval_rougeL": 0.0129, + "eval_rougeLsum": 0.0129, + "eval_runtime": 12.5442, + "eval_samples_per_second": 9.168, + "eval_steps_per_second": 1.594, + "step": 2868 + }, + { + "epoch": 149.97, + "eval_gen_len": 8.8435, + "eval_loss": 1.2170130014419556, + "eval_rouge1": 0.0147, + "eval_rouge2": 0.0008, + "eval_rougeL": 0.0128, + "eval_rougeLsum": 0.0127, + "eval_runtime": 12.6121, + "eval_samples_per_second": 9.118, + "eval_steps_per_second": 1.586, + "step": 2887 + }, + { + "epoch": 150.96, + "eval_gen_len": 9.4174, + "eval_loss": 1.208147406578064, + "eval_rouge1": 0.0181, + "eval_rouge2": 0.001, + "eval_rougeL": 0.0162, + "eval_rougeLsum": 0.0162, + "eval_runtime": 13.01, + "eval_samples_per_second": 8.839, + "eval_steps_per_second": 1.537, + "step": 2906 + }, + { + "epoch": 152.0, + "eval_gen_len": 9.7739, + "eval_loss": 1.2039202451705933, + "eval_rouge1": 0.0216, + "eval_rouge2": 0.0013, + "eval_rougeL": 0.019, + "eval_rougeLsum": 0.0191, + "eval_runtime": 12.7191, + "eval_samples_per_second": 9.042, + "eval_steps_per_second": 1.572, + "step": 2926 + }, + { + "epoch": 152.99, + "eval_gen_len": 9.5652, + "eval_loss": 1.200941801071167, + "eval_rouge1": 0.02, + "eval_rouge2": 0.0011, + "eval_rougeL": 0.0176, + "eval_rougeLsum": 0.0178, + "eval_runtime": 12.6301, + "eval_samples_per_second": 9.105, + "eval_steps_per_second": 1.584, + "step": 2945 + }, + { + "epoch": 153.97, + "eval_gen_len": 9.4609, + "eval_loss": 1.195379376411438, + "eval_rouge1": 0.0156, + "eval_rouge2": 0.0008, + "eval_rougeL": 0.0131, + "eval_rougeLsum": 0.0133, + "eval_runtime": 12.3176, + "eval_samples_per_second": 9.336, + "eval_steps_per_second": 1.624, + "step": 2964 + }, + { + "epoch": 154.96, + "eval_gen_len": 9.6522, + "eval_loss": 1.1899113655090332, + "eval_rouge1": 0.0181, + "eval_rouge2": 0.001, + "eval_rougeL": 0.0156, + "eval_rougeLsum": 0.0157, + "eval_runtime": 12.5731, + "eval_samples_per_second": 9.147, + "eval_steps_per_second": 1.591, + "step": 2983 + }, + { + "epoch": 155.84, + "grad_norm": 2.1833202838897705, + "learning_rate": 6.846315789473684e-06, + "loss": 1.4271, + "step": 3000 + }, + { + "epoch": 156.0, + "eval_gen_len": 9.4696, + "eval_loss": 1.1842440366744995, + "eval_rouge1": 0.0203, + "eval_rouge2": 0.0008, + "eval_rougeL": 0.0174, + "eval_rougeLsum": 0.0174, + "eval_runtime": 12.2741, + "eval_samples_per_second": 9.369, + "eval_steps_per_second": 1.629, + "step": 3003 + }, + { + "epoch": 156.99, + "eval_gen_len": 9.8174, + "eval_loss": 1.1782081127166748, + "eval_rouge1": 0.0187, + "eval_rouge2": 0.0007, + "eval_rougeL": 0.0163, + "eval_rougeLsum": 0.0165, + "eval_runtime": 13.1906, + "eval_samples_per_second": 8.718, + "eval_steps_per_second": 1.516, + "step": 3022 + }, + { + "epoch": 157.97, + "eval_gen_len": 9.9304, + "eval_loss": 1.173979640007019, + "eval_rouge1": 0.0206, + "eval_rouge2": 0.0005, + "eval_rougeL": 0.018, + "eval_rougeLsum": 0.0183, + "eval_runtime": 12.6288, + "eval_samples_per_second": 9.106, + "eval_steps_per_second": 1.584, + "step": 3041 + }, + { + "epoch": 158.96, + "eval_gen_len": 10.0087, + "eval_loss": 1.1698901653289795, + "eval_rouge1": 0.0198, + "eval_rouge2": 0.0005, + "eval_rougeL": 0.0177, + "eval_rougeLsum": 0.018, + "eval_runtime": 12.5926, + "eval_samples_per_second": 9.132, + "eval_steps_per_second": 1.588, + "step": 3060 + }, + { + "epoch": 160.0, + "eval_gen_len": 10.2174, + "eval_loss": 1.1631128787994385, + "eval_rouge1": 0.0214, + "eval_rouge2": 0.0004, + "eval_rougeL": 0.0189, + "eval_rougeLsum": 0.0191, + "eval_runtime": 12.2102, + "eval_samples_per_second": 9.418, + "eval_steps_per_second": 1.638, + "step": 3080 + }, + { + "epoch": 160.99, + "eval_gen_len": 10.1304, + "eval_loss": 1.1569976806640625, + "eval_rouge1": 0.0221, + "eval_rouge2": 0.0009, + "eval_rougeL": 0.0185, + "eval_rougeLsum": 0.0187, + "eval_runtime": 12.421, + "eval_samples_per_second": 9.259, + "eval_steps_per_second": 1.61, + "step": 3099 + }, + { + "epoch": 161.97, + "eval_gen_len": 10.0609, + "eval_loss": 1.1523972749710083, + "eval_rouge1": 0.0202, + "eval_rouge2": 0.0009, + "eval_rougeL": 0.0169, + "eval_rougeLsum": 0.0171, + "eval_runtime": 12.3412, + "eval_samples_per_second": 9.318, + "eval_steps_per_second": 1.621, + "step": 3118 + }, + { + "epoch": 162.96, + "eval_gen_len": 9.8609, + "eval_loss": 1.1472958326339722, + "eval_rouge1": 0.0202, + "eval_rouge2": 0.0006, + "eval_rougeL": 0.0173, + "eval_rougeLsum": 0.0174, + "eval_runtime": 12.2339, + "eval_samples_per_second": 9.4, + "eval_steps_per_second": 1.635, + "step": 3137 + }, + { + "epoch": 164.0, + "eval_gen_len": 10.3913, + "eval_loss": 1.1415693759918213, + "eval_rouge1": 0.0218, + "eval_rouge2": 0.0011, + "eval_rougeL": 0.0184, + "eval_rougeLsum": 0.0183, + "eval_runtime": 12.6611, + "eval_samples_per_second": 9.083, + "eval_steps_per_second": 1.58, + "step": 3157 + }, + { + "epoch": 164.99, + "eval_gen_len": 9.713, + "eval_loss": 1.135535478591919, + "eval_rouge1": 0.0174, + "eval_rouge2": 0.0005, + "eval_rougeL": 0.0148, + "eval_rougeLsum": 0.0146, + "eval_runtime": 12.6693, + "eval_samples_per_second": 9.077, + "eval_steps_per_second": 1.579, + "step": 3176 + }, + { + "epoch": 165.97, + "eval_gen_len": 10.113, + "eval_loss": 1.1300948858261108, + "eval_rouge1": 0.0185, + "eval_rouge2": 0.0007, + "eval_rougeL": 0.0151, + "eval_rougeLsum": 0.0151, + "eval_runtime": 12.211, + "eval_samples_per_second": 9.418, + "eval_steps_per_second": 1.638, + "step": 3195 + }, + { + "epoch": 166.96, + "eval_gen_len": 10.1043, + "eval_loss": 1.125083088874817, + "eval_rouge1": 0.0205, + "eval_rouge2": 0.0008, + "eval_rougeL": 0.0165, + "eval_rougeLsum": 0.0164, + "eval_runtime": 12.1409, + "eval_samples_per_second": 9.472, + "eval_steps_per_second": 1.647, + "step": 3214 + }, + { + "epoch": 168.0, + "eval_gen_len": 10.2348, + "eval_loss": 1.1202179193496704, + "eval_rouge1": 0.0195, + "eval_rouge2": 0.0008, + "eval_rougeL": 0.0159, + "eval_rougeLsum": 0.0159, + "eval_runtime": 12.1317, + "eval_samples_per_second": 9.479, + "eval_steps_per_second": 1.649, + "step": 3234 + }, + { + "epoch": 168.99, + "eval_gen_len": 10.8957, + "eval_loss": 1.114139199256897, + "eval_rouge1": 0.0243, + "eval_rouge2": 0.0018, + "eval_rougeL": 0.0198, + "eval_rougeLsum": 0.0198, + "eval_runtime": 12.443, + "eval_samples_per_second": 9.242, + "eval_steps_per_second": 1.607, + "step": 3253 + }, + { + "epoch": 169.97, + "eval_gen_len": 11.0174, + "eval_loss": 1.1090463399887085, + "eval_rouge1": 0.0202, + "eval_rouge2": 0.0013, + "eval_rougeL": 0.0161, + "eval_rougeLsum": 0.0163, + "eval_runtime": 12.144, + "eval_samples_per_second": 9.47, + "eval_steps_per_second": 1.647, + "step": 3272 + }, + { + "epoch": 170.96, + "eval_gen_len": 11.313, + "eval_loss": 1.1036903858184814, + "eval_rouge1": 0.0223, + "eval_rouge2": 0.0015, + "eval_rougeL": 0.0186, + "eval_rougeLsum": 0.0186, + "eval_runtime": 12.3793, + "eval_samples_per_second": 9.29, + "eval_steps_per_second": 1.616, + "step": 3291 + }, + { + "epoch": 172.0, + "eval_gen_len": 11.3739, + "eval_loss": 1.0987364053726196, + "eval_rouge1": 0.0212, + "eval_rouge2": 0.0013, + "eval_rougeL": 0.0178, + "eval_rougeLsum": 0.0179, + "eval_runtime": 12.3964, + "eval_samples_per_second": 9.277, + "eval_steps_per_second": 1.613, + "step": 3311 + }, + { + "epoch": 172.99, + "eval_gen_len": 11.2522, + "eval_loss": 1.0937457084655762, + "eval_rouge1": 0.0219, + "eval_rouge2": 0.0015, + "eval_rougeL": 0.0182, + "eval_rougeLsum": 0.018, + "eval_runtime": 12.5831, + "eval_samples_per_second": 9.139, + "eval_steps_per_second": 1.589, + "step": 3330 + }, + { + "epoch": 173.97, + "eval_gen_len": 11.2174, + "eval_loss": 1.090100646018982, + "eval_rouge1": 0.0199, + "eval_rouge2": 0.0013, + "eval_rougeL": 0.0162, + "eval_rougeLsum": 0.0163, + "eval_runtime": 12.7101, + "eval_samples_per_second": 9.048, + "eval_steps_per_second": 1.574, + "step": 3349 + }, + { + "epoch": 174.96, + "eval_gen_len": 11.2174, + "eval_loss": 1.0861694812774658, + "eval_rouge1": 0.018, + "eval_rouge2": 0.0011, + "eval_rougeL": 0.0149, + "eval_rougeLsum": 0.0149, + "eval_runtime": 12.2255, + "eval_samples_per_second": 9.407, + "eval_steps_per_second": 1.636, + "step": 3368 + }, + { + "epoch": 176.0, + "eval_gen_len": 11.3304, + "eval_loss": 1.080249309539795, + "eval_rouge1": 0.0181, + "eval_rouge2": 0.0013, + "eval_rougeL": 0.0154, + "eval_rougeLsum": 0.0154, + "eval_runtime": 12.2755, + "eval_samples_per_second": 9.368, + "eval_steps_per_second": 1.629, + "step": 3388 + }, + { + "epoch": 176.99, + "eval_gen_len": 10.9739, + "eval_loss": 1.0751179456710815, + "eval_rouge1": 0.0147, + "eval_rouge2": 0.0012, + "eval_rougeL": 0.0124, + "eval_rougeLsum": 0.0125, + "eval_runtime": 12.3848, + "eval_samples_per_second": 9.286, + "eval_steps_per_second": 1.615, + "step": 3407 + }, + { + "epoch": 177.97, + "eval_gen_len": 10.8087, + "eval_loss": 1.069909930229187, + "eval_rouge1": 0.0149, + "eval_rouge2": 0.001, + "eval_rougeL": 0.0123, + "eval_rougeLsum": 0.0124, + "eval_runtime": 12.6526, + "eval_samples_per_second": 9.089, + "eval_steps_per_second": 1.581, + "step": 3426 + }, + { + "epoch": 178.96, + "eval_gen_len": 10.7217, + "eval_loss": 1.0651546716690063, + "eval_rouge1": 0.0134, + "eval_rouge2": 0.0011, + "eval_rougeL": 0.0122, + "eval_rougeLsum": 0.0122, + "eval_runtime": 13.0395, + "eval_samples_per_second": 8.819, + "eval_steps_per_second": 1.534, + "step": 3445 + }, + { + "epoch": 180.0, + "eval_gen_len": 10.6174, + "eval_loss": 1.060491681098938, + "eval_rouge1": 0.0121, + "eval_rouge2": 0.001, + "eval_rougeL": 0.0101, + "eval_rougeLsum": 0.0101, + "eval_runtime": 12.3246, + "eval_samples_per_second": 9.331, + "eval_steps_per_second": 1.623, + "step": 3465 + }, + { + "epoch": 180.99, + "eval_gen_len": 10.6435, + "eval_loss": 1.0562814474105835, + "eval_rouge1": 0.0131, + "eval_rouge2": 0.0007, + "eval_rougeL": 0.0113, + "eval_rougeLsum": 0.0111, + "eval_runtime": 13.1955, + "eval_samples_per_second": 8.715, + "eval_steps_per_second": 1.516, + "step": 3484 + }, + { + "epoch": 181.82, + "grad_norm": 0.9931882619857788, + "learning_rate": 6.3200000000000005e-06, + "loss": 1.265, + "step": 3500 + }, + { + "epoch": 181.97, + "eval_gen_len": 10.3913, + "eval_loss": 1.0519821643829346, + "eval_rouge1": 0.0147, + "eval_rouge2": 0.0019, + "eval_rougeL": 0.0129, + "eval_rougeLsum": 0.0125, + "eval_runtime": 12.3478, + "eval_samples_per_second": 9.313, + "eval_steps_per_second": 1.62, + "step": 3503 + }, + { + "epoch": 182.96, + "eval_gen_len": 10.9826, + "eval_loss": 1.047600507736206, + "eval_rouge1": 0.0171, + "eval_rouge2": 0.0018, + "eval_rougeL": 0.0148, + "eval_rougeLsum": 0.0148, + "eval_runtime": 12.7548, + "eval_samples_per_second": 9.016, + "eval_steps_per_second": 1.568, + "step": 3522 + }, + { + "epoch": 184.0, + "eval_gen_len": 10.9478, + "eval_loss": 1.0428956747055054, + "eval_rouge1": 0.019, + "eval_rouge2": 0.0026, + "eval_rougeL": 0.0173, + "eval_rougeLsum": 0.0174, + "eval_runtime": 12.668, + "eval_samples_per_second": 9.078, + "eval_steps_per_second": 1.579, + "step": 3542 + }, + { + "epoch": 184.99, + "eval_gen_len": 10.6348, + "eval_loss": 1.0391294956207275, + "eval_rouge1": 0.0192, + "eval_rouge2": 0.0019, + "eval_rougeL": 0.016, + "eval_rougeLsum": 0.0162, + "eval_runtime": 12.2592, + "eval_samples_per_second": 9.381, + "eval_steps_per_second": 1.631, + "step": 3561 + }, + { + "epoch": 185.97, + "eval_gen_len": 10.6, + "eval_loss": 1.0354028940200806, + "eval_rouge1": 0.0192, + "eval_rouge2": 0.0021, + "eval_rougeL": 0.0154, + "eval_rougeLsum": 0.0155, + "eval_runtime": 12.3279, + "eval_samples_per_second": 9.328, + "eval_steps_per_second": 1.622, + "step": 3580 + }, + { + "epoch": 186.96, + "eval_gen_len": 10.2261, + "eval_loss": 1.0318480730056763, + "eval_rouge1": 0.0193, + "eval_rouge2": 0.003, + "eval_rougeL": 0.0162, + "eval_rougeLsum": 0.0163, + "eval_runtime": 12.7454, + "eval_samples_per_second": 9.023, + "eval_steps_per_second": 1.569, + "step": 3599 + }, + { + "epoch": 188.0, + "eval_gen_len": 10.6261, + "eval_loss": 1.0279144048690796, + "eval_rouge1": 0.0245, + "eval_rouge2": 0.0032, + "eval_rougeL": 0.0201, + "eval_rougeLsum": 0.02, + "eval_runtime": 12.5706, + "eval_samples_per_second": 9.148, + "eval_steps_per_second": 1.591, + "step": 3619 + }, + { + "epoch": 188.99, + "eval_gen_len": 10.5913, + "eval_loss": 1.0238802433013916, + "eval_rouge1": 0.025, + "eval_rouge2": 0.0029, + "eval_rougeL": 0.0206, + "eval_rougeLsum": 0.0207, + "eval_runtime": 12.4275, + "eval_samples_per_second": 9.254, + "eval_steps_per_second": 1.609, + "step": 3638 + }, + { + "epoch": 189.97, + "eval_gen_len": 10.2261, + "eval_loss": 1.0197361707687378, + "eval_rouge1": 0.0249, + "eval_rouge2": 0.0029, + "eval_rougeL": 0.0198, + "eval_rougeLsum": 0.0199, + "eval_runtime": 12.6121, + "eval_samples_per_second": 9.118, + "eval_steps_per_second": 1.586, + "step": 3657 + }, + { + "epoch": 190.96, + "eval_gen_len": 10.1391, + "eval_loss": 1.0159963369369507, + "eval_rouge1": 0.0245, + "eval_rouge2": 0.003, + "eval_rougeL": 0.019, + "eval_rougeLsum": 0.0191, + "eval_runtime": 12.81, + "eval_samples_per_second": 8.977, + "eval_steps_per_second": 1.561, + "step": 3676 + }, + { + "epoch": 192.0, + "eval_gen_len": 10.2435, + "eval_loss": 1.0119863748550415, + "eval_rouge1": 0.0243, + "eval_rouge2": 0.0027, + "eval_rougeL": 0.019, + "eval_rougeLsum": 0.019, + "eval_runtime": 12.53, + "eval_samples_per_second": 9.178, + "eval_steps_per_second": 1.596, + "step": 3696 + }, + { + "epoch": 192.99, + "eval_gen_len": 10.3826, + "eval_loss": 1.008431315422058, + "eval_rouge1": 0.0247, + "eval_rouge2": 0.0029, + "eval_rougeL": 0.0194, + "eval_rougeLsum": 0.0193, + "eval_runtime": 12.6196, + "eval_samples_per_second": 9.113, + "eval_steps_per_second": 1.585, + "step": 3715 + }, + { + "epoch": 193.97, + "eval_gen_len": 10.6696, + "eval_loss": 1.0049232244491577, + "eval_rouge1": 0.0239, + "eval_rouge2": 0.0027, + "eval_rougeL": 0.0186, + "eval_rougeLsum": 0.0185, + "eval_runtime": 12.5612, + "eval_samples_per_second": 9.155, + "eval_steps_per_second": 1.592, + "step": 3734 + }, + { + "epoch": 194.96, + "eval_gen_len": 11.1043, + "eval_loss": 1.0015385150909424, + "eval_rouge1": 0.0248, + "eval_rouge2": 0.0029, + "eval_rougeL": 0.0195, + "eval_rougeLsum": 0.0195, + "eval_runtime": 11.9572, + "eval_samples_per_second": 9.618, + "eval_steps_per_second": 1.673, + "step": 3753 + }, + { + "epoch": 196.0, + "eval_gen_len": 10.8609, + "eval_loss": 0.9973338842391968, + "eval_rouge1": 0.0233, + "eval_rouge2": 0.0026, + "eval_rougeL": 0.0189, + "eval_rougeLsum": 0.0189, + "eval_runtime": 11.9829, + "eval_samples_per_second": 9.597, + "eval_steps_per_second": 1.669, + "step": 3773 + }, + { + "epoch": 196.99, + "eval_gen_len": 10.6783, + "eval_loss": 0.9933551549911499, + "eval_rouge1": 0.0209, + "eval_rouge2": 0.0028, + "eval_rougeL": 0.0172, + "eval_rougeLsum": 0.0172, + "eval_runtime": 12.3195, + "eval_samples_per_second": 9.335, + "eval_steps_per_second": 1.623, + "step": 3792 + }, + { + "epoch": 197.97, + "eval_gen_len": 10.9043, + "eval_loss": 0.9898021817207336, + "eval_rouge1": 0.0224, + "eval_rouge2": 0.0028, + "eval_rougeL": 0.0183, + "eval_rougeLsum": 0.0182, + "eval_runtime": 12.6246, + "eval_samples_per_second": 9.109, + "eval_steps_per_second": 1.584, + "step": 3811 + }, + { + "epoch": 198.96, + "eval_gen_len": 11.2435, + "eval_loss": 0.9868430495262146, + "eval_rouge1": 0.0223, + "eval_rouge2": 0.0034, + "eval_rougeL": 0.0186, + "eval_rougeLsum": 0.0186, + "eval_runtime": 12.4605, + "eval_samples_per_second": 9.229, + "eval_steps_per_second": 1.605, + "step": 3830 + }, + { + "epoch": 200.0, + "eval_gen_len": 11.1565, + "eval_loss": 0.9835883975028992, + "eval_rouge1": 0.0212, + "eval_rouge2": 0.0033, + "eval_rougeL": 0.0182, + "eval_rougeLsum": 0.018, + "eval_runtime": 12.0605, + "eval_samples_per_second": 9.535, + "eval_steps_per_second": 1.658, + "step": 3850 + }, + { + "epoch": 200.99, + "eval_gen_len": 11.2087, + "eval_loss": 0.9812867641448975, + "eval_rouge1": 0.0202, + "eval_rouge2": 0.003, + "eval_rougeL": 0.0164, + "eval_rougeLsum": 0.0164, + "eval_runtime": 12.6958, + "eval_samples_per_second": 9.058, + "eval_steps_per_second": 1.575, + "step": 3869 + }, + { + "epoch": 201.97, + "eval_gen_len": 11.2783, + "eval_loss": 0.9780998229980469, + "eval_rouge1": 0.0192, + "eval_rouge2": 0.0032, + "eval_rougeL": 0.0158, + "eval_rougeLsum": 0.0158, + "eval_runtime": 12.2767, + "eval_samples_per_second": 9.367, + "eval_steps_per_second": 1.629, + "step": 3888 + }, + { + "epoch": 202.96, + "eval_gen_len": 11.113, + "eval_loss": 0.9748227596282959, + "eval_rouge1": 0.0174, + "eval_rouge2": 0.0028, + "eval_rougeL": 0.0144, + "eval_rougeLsum": 0.0144, + "eval_runtime": 12.2251, + "eval_samples_per_second": 9.407, + "eval_steps_per_second": 1.636, + "step": 3907 + }, + { + "epoch": 204.0, + "eval_gen_len": 11.3304, + "eval_loss": 0.9713881015777588, + "eval_rouge1": 0.0187, + "eval_rouge2": 0.0026, + "eval_rougeL": 0.0157, + "eval_rougeLsum": 0.0157, + "eval_runtime": 12.1776, + "eval_samples_per_second": 9.444, + "eval_steps_per_second": 1.642, + "step": 3927 + }, + { + "epoch": 204.99, + "eval_gen_len": 11.5043, + "eval_loss": 0.968216598033905, + "eval_rouge1": 0.0199, + "eval_rouge2": 0.0026, + "eval_rougeL": 0.0164, + "eval_rougeLsum": 0.0166, + "eval_runtime": 12.4259, + "eval_samples_per_second": 9.255, + "eval_steps_per_second": 1.61, + "step": 3946 + }, + { + "epoch": 205.97, + "eval_gen_len": 11.4261, + "eval_loss": 0.9647319912910461, + "eval_rouge1": 0.0184, + "eval_rouge2": 0.0025, + "eval_rougeL": 0.0154, + "eval_rougeLsum": 0.0154, + "eval_runtime": 12.6085, + "eval_samples_per_second": 9.121, + "eval_steps_per_second": 1.586, + "step": 3965 + }, + { + "epoch": 206.96, + "eval_gen_len": 11.6087, + "eval_loss": 0.9613582491874695, + "eval_rouge1": 0.0172, + "eval_rouge2": 0.0018, + "eval_rougeL": 0.0146, + "eval_rougeLsum": 0.0145, + "eval_runtime": 12.5542, + "eval_samples_per_second": 9.16, + "eval_steps_per_second": 1.593, + "step": 3984 + }, + { + "epoch": 207.79, + "grad_norm": 0.6749991178512573, + "learning_rate": 5.793684210526316e-06, + "loss": 1.119, + "step": 4000 + }, + { + "epoch": 208.0, + "eval_gen_len": 11.8087, + "eval_loss": 0.9580429196357727, + "eval_rouge1": 0.0206, + "eval_rouge2": 0.0023, + "eval_rougeL": 0.0168, + "eval_rougeLsum": 0.0167, + "eval_runtime": 12.4965, + "eval_samples_per_second": 9.203, + "eval_steps_per_second": 1.6, + "step": 4004 + }, + { + "epoch": 208.99, + "eval_gen_len": 12.0957, + "eval_loss": 0.9548400640487671, + "eval_rouge1": 0.0233, + "eval_rouge2": 0.0023, + "eval_rougeL": 0.019, + "eval_rougeLsum": 0.019, + "eval_runtime": 13.061, + "eval_samples_per_second": 8.805, + "eval_steps_per_second": 1.531, + "step": 4023 + }, + { + "epoch": 209.97, + "eval_gen_len": 11.9826, + "eval_loss": 0.9517626166343689, + "eval_rouge1": 0.0214, + "eval_rouge2": 0.0021, + "eval_rougeL": 0.0181, + "eval_rougeLsum": 0.018, + "eval_runtime": 12.6476, + "eval_samples_per_second": 9.093, + "eval_steps_per_second": 1.581, + "step": 4042 + }, + { + "epoch": 210.96, + "eval_gen_len": 11.9304, + "eval_loss": 0.9485481381416321, + "eval_rouge1": 0.0208, + "eval_rouge2": 0.0018, + "eval_rougeL": 0.0171, + "eval_rougeLsum": 0.0172, + "eval_runtime": 12.4352, + "eval_samples_per_second": 9.248, + "eval_steps_per_second": 1.608, + "step": 4061 + }, + { + "epoch": 212.0, + "eval_gen_len": 11.7826, + "eval_loss": 0.9455087184906006, + "eval_rouge1": 0.0184, + "eval_rouge2": 0.0016, + "eval_rougeL": 0.0152, + "eval_rougeLsum": 0.015, + "eval_runtime": 12.868, + "eval_samples_per_second": 8.937, + "eval_steps_per_second": 1.554, + "step": 4081 + }, + { + "epoch": 212.99, + "eval_gen_len": 11.7565, + "eval_loss": 0.9424554109573364, + "eval_rouge1": 0.0186, + "eval_rouge2": 0.0028, + "eval_rougeL": 0.0153, + "eval_rougeLsum": 0.0153, + "eval_runtime": 12.3925, + "eval_samples_per_second": 9.28, + "eval_steps_per_second": 1.614, + "step": 4100 + }, + { + "epoch": 213.97, + "eval_gen_len": 11.3913, + "eval_loss": 0.939349889755249, + "eval_rouge1": 0.0165, + "eval_rouge2": 0.002, + "eval_rougeL": 0.0131, + "eval_rougeLsum": 0.0131, + "eval_runtime": 12.3014, + "eval_samples_per_second": 9.349, + "eval_steps_per_second": 1.626, + "step": 4119 + }, + { + "epoch": 214.96, + "eval_gen_len": 11.4522, + "eval_loss": 0.9365057349205017, + "eval_rouge1": 0.0177, + "eval_rouge2": 0.0022, + "eval_rougeL": 0.0143, + "eval_rougeLsum": 0.0143, + "eval_runtime": 12.8423, + "eval_samples_per_second": 8.955, + "eval_steps_per_second": 1.557, + "step": 4138 + }, + { + "epoch": 216.0, + "eval_gen_len": 11.7391, + "eval_loss": 0.9332289099693298, + "eval_rouge1": 0.0213, + "eval_rouge2": 0.0028, + "eval_rougeL": 0.0177, + "eval_rougeLsum": 0.0173, + "eval_runtime": 12.4944, + "eval_samples_per_second": 9.204, + "eval_steps_per_second": 1.601, + "step": 4158 + }, + { + "epoch": 216.99, + "eval_gen_len": 11.6522, + "eval_loss": 0.9310381412506104, + "eval_rouge1": 0.0197, + "eval_rouge2": 0.0028, + "eval_rougeL": 0.0159, + "eval_rougeLsum": 0.0157, + "eval_runtime": 12.5982, + "eval_samples_per_second": 9.128, + "eval_steps_per_second": 1.588, + "step": 4177 + }, + { + "epoch": 217.97, + "eval_gen_len": 11.687, + "eval_loss": 0.9279318451881409, + "eval_rouge1": 0.0203, + "eval_rouge2": 0.0029, + "eval_rougeL": 0.0168, + "eval_rougeLsum": 0.0165, + "eval_runtime": 12.2551, + "eval_samples_per_second": 9.384, + "eval_steps_per_second": 1.632, + "step": 4196 + }, + { + "epoch": 218.96, + "eval_gen_len": 11.7043, + "eval_loss": 0.9249460697174072, + "eval_rouge1": 0.0228, + "eval_rouge2": 0.0032, + "eval_rougeL": 0.0191, + "eval_rougeLsum": 0.019, + "eval_runtime": 12.661, + "eval_samples_per_second": 9.083, + "eval_steps_per_second": 1.58, + "step": 4215 + }, + { + "epoch": 220.0, + "eval_gen_len": 11.2783, + "eval_loss": 0.9218883514404297, + "eval_rouge1": 0.0219, + "eval_rouge2": 0.0032, + "eval_rougeL": 0.0182, + "eval_rougeLsum": 0.018, + "eval_runtime": 12.5398, + "eval_samples_per_second": 9.171, + "eval_steps_per_second": 1.595, + "step": 4235 + }, + { + "epoch": 220.99, + "eval_gen_len": 11.0087, + "eval_loss": 0.9194144010543823, + "eval_rouge1": 0.0203, + "eval_rouge2": 0.0029, + "eval_rougeL": 0.0171, + "eval_rougeLsum": 0.0167, + "eval_runtime": 12.4516, + "eval_samples_per_second": 9.236, + "eval_steps_per_second": 1.606, + "step": 4254 + }, + { + "epoch": 221.97, + "eval_gen_len": 10.8174, + "eval_loss": 0.9165053963661194, + "eval_rouge1": 0.0197, + "eval_rouge2": 0.0021, + "eval_rougeL": 0.0164, + "eval_rougeLsum": 0.0161, + "eval_runtime": 12.4796, + "eval_samples_per_second": 9.215, + "eval_steps_per_second": 1.603, + "step": 4273 + }, + { + "epoch": 222.96, + "eval_gen_len": 10.9652, + "eval_loss": 0.9133741855621338, + "eval_rouge1": 0.0226, + "eval_rouge2": 0.0027, + "eval_rougeL": 0.0185, + "eval_rougeLsum": 0.0182, + "eval_runtime": 12.2854, + "eval_samples_per_second": 9.361, + "eval_steps_per_second": 1.628, + "step": 4292 + }, + { + "epoch": 224.0, + "eval_gen_len": 10.7565, + "eval_loss": 0.9105393886566162, + "eval_rouge1": 0.0245, + "eval_rouge2": 0.0032, + "eval_rougeL": 0.0199, + "eval_rougeLsum": 0.0197, + "eval_runtime": 12.0278, + "eval_samples_per_second": 9.561, + "eval_steps_per_second": 1.663, + "step": 4312 + }, + { + "epoch": 224.99, + "eval_gen_len": 10.1391, + "eval_loss": 0.907561719417572, + "eval_rouge1": 0.0198, + "eval_rouge2": 0.0025, + "eval_rougeL": 0.0163, + "eval_rougeLsum": 0.0161, + "eval_runtime": 12.2338, + "eval_samples_per_second": 9.4, + "eval_steps_per_second": 1.635, + "step": 4331 + }, + { + "epoch": 225.97, + "eval_gen_len": 9.8522, + "eval_loss": 0.9046717286109924, + "eval_rouge1": 0.0171, + "eval_rouge2": 0.0029, + "eval_rougeL": 0.0145, + "eval_rougeLsum": 0.0141, + "eval_runtime": 12.5334, + "eval_samples_per_second": 9.175, + "eval_steps_per_second": 1.596, + "step": 4350 + }, + { + "epoch": 226.96, + "eval_gen_len": 9.6, + "eval_loss": 0.9021281599998474, + "eval_rouge1": 0.0167, + "eval_rouge2": 0.0025, + "eval_rougeL": 0.0145, + "eval_rougeLsum": 0.0141, + "eval_runtime": 12.1542, + "eval_samples_per_second": 9.462, + "eval_steps_per_second": 1.646, + "step": 4369 + }, + { + "epoch": 228.0, + "eval_gen_len": 9.6261, + "eval_loss": 0.8991298675537109, + "eval_rouge1": 0.0181, + "eval_rouge2": 0.0019, + "eval_rougeL": 0.0153, + "eval_rougeLsum": 0.0148, + "eval_runtime": 12.1167, + "eval_samples_per_second": 9.491, + "eval_steps_per_second": 1.651, + "step": 4389 + }, + { + "epoch": 228.99, + "eval_gen_len": 9.687, + "eval_loss": 0.8962268829345703, + "eval_rouge1": 0.0217, + "eval_rouge2": 0.0027, + "eval_rougeL": 0.0176, + "eval_rougeLsum": 0.0172, + "eval_runtime": 12.1668, + "eval_samples_per_second": 9.452, + "eval_steps_per_second": 1.644, + "step": 4408 + }, + { + "epoch": 229.97, + "eval_gen_len": 9.2435, + "eval_loss": 0.8939462304115295, + "eval_rouge1": 0.0223, + "eval_rouge2": 0.0029, + "eval_rougeL": 0.0178, + "eval_rougeLsum": 0.0175, + "eval_runtime": 12.2423, + "eval_samples_per_second": 9.394, + "eval_steps_per_second": 1.634, + "step": 4427 + }, + { + "epoch": 230.96, + "eval_gen_len": 9.1304, + "eval_loss": 0.8907042145729065, + "eval_rouge1": 0.0216, + "eval_rouge2": 0.0029, + "eval_rougeL": 0.0179, + "eval_rougeLsum": 0.0175, + "eval_runtime": 12.2251, + "eval_samples_per_second": 9.407, + "eval_steps_per_second": 1.636, + "step": 4446 + }, + { + "epoch": 232.0, + "eval_gen_len": 8.9652, + "eval_loss": 0.8877010345458984, + "eval_rouge1": 0.0211, + "eval_rouge2": 0.0025, + "eval_rougeL": 0.0169, + "eval_rougeLsum": 0.0166, + "eval_runtime": 12.123, + "eval_samples_per_second": 9.486, + "eval_steps_per_second": 1.65, + "step": 4466 + }, + { + "epoch": 232.99, + "eval_gen_len": 8.7739, + "eval_loss": 0.8858217597007751, + "eval_rouge1": 0.0209, + "eval_rouge2": 0.0027, + "eval_rougeL": 0.0168, + "eval_rougeLsum": 0.0164, + "eval_runtime": 12.3317, + "eval_samples_per_second": 9.326, + "eval_steps_per_second": 1.622, + "step": 4485 + }, + { + "epoch": 233.77, + "grad_norm": 0.45308393239974976, + "learning_rate": 5.267368421052632e-06, + "loss": 1.0189, + "step": 4500 + }, + { + "epoch": 233.97, + "eval_gen_len": 8.6087, + "eval_loss": 0.8837451934814453, + "eval_rouge1": 0.0221, + "eval_rouge2": 0.0032, + "eval_rougeL": 0.0177, + "eval_rougeLsum": 0.0173, + "eval_runtime": 13.1885, + "eval_samples_per_second": 8.72, + "eval_steps_per_second": 1.516, + "step": 4504 + }, + { + "epoch": 234.96, + "eval_gen_len": 8.487, + "eval_loss": 0.8812865614891052, + "eval_rouge1": 0.0224, + "eval_rouge2": 0.003, + "eval_rougeL": 0.0175, + "eval_rougeLsum": 0.0172, + "eval_runtime": 12.2108, + "eval_samples_per_second": 9.418, + "eval_steps_per_second": 1.638, + "step": 4523 + }, + { + "epoch": 236.0, + "eval_gen_len": 8.2957, + "eval_loss": 0.8780920505523682, + "eval_rouge1": 0.0225, + "eval_rouge2": 0.0028, + "eval_rougeL": 0.0171, + "eval_rougeLsum": 0.0168, + "eval_runtime": 12.2449, + "eval_samples_per_second": 9.392, + "eval_steps_per_second": 1.633, + "step": 4543 + }, + { + "epoch": 236.99, + "eval_gen_len": 7.9304, + "eval_loss": 0.8753093481063843, + "eval_rouge1": 0.0215, + "eval_rouge2": 0.0027, + "eval_rougeL": 0.016, + "eval_rougeLsum": 0.0158, + "eval_runtime": 12.4484, + "eval_samples_per_second": 9.238, + "eval_steps_per_second": 1.607, + "step": 4562 + }, + { + "epoch": 237.97, + "eval_gen_len": 7.8174, + "eval_loss": 0.8730840086936951, + "eval_rouge1": 0.0211, + "eval_rouge2": 0.0027, + "eval_rougeL": 0.016, + "eval_rougeLsum": 0.0156, + "eval_runtime": 12.5923, + "eval_samples_per_second": 9.133, + "eval_steps_per_second": 1.588, + "step": 4581 + }, + { + "epoch": 238.96, + "eval_gen_len": 7.687, + "eval_loss": 0.8703946471214294, + "eval_rouge1": 0.0209, + "eval_rouge2": 0.0027, + "eval_rougeL": 0.0158, + "eval_rougeLsum": 0.0154, + "eval_runtime": 12.8465, + "eval_samples_per_second": 8.952, + "eval_steps_per_second": 1.557, + "step": 4600 + }, + { + "epoch": 240.0, + "eval_gen_len": 7.3652, + "eval_loss": 0.8674846887588501, + "eval_rouge1": 0.0211, + "eval_rouge2": 0.0027, + "eval_rougeL": 0.0158, + "eval_rougeLsum": 0.0154, + "eval_runtime": 12.4238, + "eval_samples_per_second": 9.256, + "eval_steps_per_second": 1.61, + "step": 4620 + }, + { + "epoch": 240.99, + "eval_gen_len": 7.2609, + "eval_loss": 0.8647277355194092, + "eval_rouge1": 0.0204, + "eval_rouge2": 0.0022, + "eval_rougeL": 0.0147, + "eval_rougeLsum": 0.0143, + "eval_runtime": 12.6703, + "eval_samples_per_second": 9.076, + "eval_steps_per_second": 1.578, + "step": 4639 + }, + { + "epoch": 241.97, + "eval_gen_len": 7.0609, + "eval_loss": 0.8625157475471497, + "eval_rouge1": 0.0206, + "eval_rouge2": 0.0023, + "eval_rougeL": 0.0152, + "eval_rougeLsum": 0.0149, + "eval_runtime": 12.5819, + "eval_samples_per_second": 9.14, + "eval_steps_per_second": 1.59, + "step": 4658 + }, + { + "epoch": 242.96, + "eval_gen_len": 6.5652, + "eval_loss": 0.8605428338050842, + "eval_rouge1": 0.0182, + "eval_rouge2": 0.0017, + "eval_rougeL": 0.0133, + "eval_rougeLsum": 0.0131, + "eval_runtime": 13.1768, + "eval_samples_per_second": 8.727, + "eval_steps_per_second": 1.518, + "step": 4677 + }, + { + "epoch": 244.0, + "eval_gen_len": 6.4261, + "eval_loss": 0.8578657507896423, + "eval_rouge1": 0.0177, + "eval_rouge2": 0.0021, + "eval_rougeL": 0.0134, + "eval_rougeLsum": 0.0131, + "eval_runtime": 12.6947, + "eval_samples_per_second": 9.059, + "eval_steps_per_second": 1.575, + "step": 4697 + }, + { + "epoch": 244.99, + "eval_gen_len": 6.2783, + "eval_loss": 0.8557173609733582, + "eval_rouge1": 0.0177, + "eval_rouge2": 0.0021, + "eval_rougeL": 0.0134, + "eval_rougeLsum": 0.013, + "eval_runtime": 12.5391, + "eval_samples_per_second": 9.171, + "eval_steps_per_second": 1.595, + "step": 4716 + }, + { + "epoch": 245.97, + "eval_gen_len": 6.2435, + "eval_loss": 0.8529919981956482, + "eval_rouge1": 0.0169, + "eval_rouge2": 0.0014, + "eval_rougeL": 0.0131, + "eval_rougeLsum": 0.0127, + "eval_runtime": 12.7454, + "eval_samples_per_second": 9.023, + "eval_steps_per_second": 1.569, + "step": 4735 + }, + { + "epoch": 246.96, + "eval_gen_len": 6.1565, + "eval_loss": 0.850603461265564, + "eval_rouge1": 0.0191, + "eval_rouge2": 0.0019, + "eval_rougeL": 0.0145, + "eval_rougeLsum": 0.0141, + "eval_runtime": 12.284, + "eval_samples_per_second": 9.362, + "eval_steps_per_second": 1.628, + "step": 4754 + }, + { + "epoch": 248.0, + "eval_gen_len": 5.9478, + "eval_loss": 0.8480112552642822, + "eval_rouge1": 0.0186, + "eval_rouge2": 0.0015, + "eval_rougeL": 0.0146, + "eval_rougeLsum": 0.0142, + "eval_runtime": 12.3791, + "eval_samples_per_second": 9.29, + "eval_steps_per_second": 1.616, + "step": 4774 + }, + { + "epoch": 248.99, + "eval_gen_len": 5.7043, + "eval_loss": 0.8458153009414673, + "eval_rouge1": 0.0173, + "eval_rouge2": 0.0013, + "eval_rougeL": 0.0137, + "eval_rougeLsum": 0.0131, + "eval_runtime": 12.4132, + "eval_samples_per_second": 9.264, + "eval_steps_per_second": 1.611, + "step": 4793 + }, + { + "epoch": 249.97, + "eval_gen_len": 5.7478, + "eval_loss": 0.8430487513542175, + "eval_rouge1": 0.0169, + "eval_rouge2": 0.0015, + "eval_rougeL": 0.0136, + "eval_rougeLsum": 0.0133, + "eval_runtime": 12.8141, + "eval_samples_per_second": 8.974, + "eval_steps_per_second": 1.561, + "step": 4812 + }, + { + "epoch": 250.96, + "eval_gen_len": 5.3739, + "eval_loss": 0.841323971748352, + "eval_rouge1": 0.0152, + "eval_rouge2": 0.0016, + "eval_rougeL": 0.0124, + "eval_rougeLsum": 0.0121, + "eval_runtime": 12.5346, + "eval_samples_per_second": 9.175, + "eval_steps_per_second": 1.596, + "step": 4831 + }, + { + "epoch": 252.0, + "eval_gen_len": 5.1565, + "eval_loss": 0.838948130607605, + "eval_rouge1": 0.0149, + "eval_rouge2": 0.0011, + "eval_rougeL": 0.0124, + "eval_rougeLsum": 0.012, + "eval_runtime": 13.0143, + "eval_samples_per_second": 8.836, + "eval_steps_per_second": 1.537, + "step": 4851 + }, + { + "epoch": 252.99, + "eval_gen_len": 4.9739, + "eval_loss": 0.8368021249771118, + "eval_rouge1": 0.0148, + "eval_rouge2": 0.0011, + "eval_rougeL": 0.0123, + "eval_rougeLsum": 0.0119, + "eval_runtime": 12.4217, + "eval_samples_per_second": 9.258, + "eval_steps_per_second": 1.61, + "step": 4870 + }, + { + "epoch": 253.97, + "eval_gen_len": 4.9652, + "eval_loss": 0.8342902660369873, + "eval_rouge1": 0.0158, + "eval_rouge2": 0.0011, + "eval_rougeL": 0.013, + "eval_rougeLsum": 0.0127, + "eval_runtime": 12.2067, + "eval_samples_per_second": 9.421, + "eval_steps_per_second": 1.638, + "step": 4889 + }, + { + "epoch": 254.96, + "eval_gen_len": 4.6522, + "eval_loss": 0.8321281671524048, + "eval_rouge1": 0.0145, + "eval_rouge2": 0.0009, + "eval_rougeL": 0.012, + "eval_rougeLsum": 0.0117, + "eval_runtime": 12.8126, + "eval_samples_per_second": 8.976, + "eval_steps_per_second": 1.561, + "step": 4908 + }, + { + "epoch": 256.0, + "eval_gen_len": 4.2522, + "eval_loss": 0.8296378254890442, + "eval_rouge1": 0.0139, + "eval_rouge2": 0.0009, + "eval_rougeL": 0.0113, + "eval_rougeLsum": 0.0112, + "eval_runtime": 12.3844, + "eval_samples_per_second": 9.286, + "eval_steps_per_second": 1.615, + "step": 4928 + }, + { + "epoch": 256.99, + "eval_gen_len": 4.1826, + "eval_loss": 0.8276596069335938, + "eval_rouge1": 0.0143, + "eval_rouge2": 0.0009, + "eval_rougeL": 0.0118, + "eval_rougeLsum": 0.0117, + "eval_runtime": 12.4227, + "eval_samples_per_second": 9.257, + "eval_steps_per_second": 1.61, + "step": 4947 + }, + { + "epoch": 257.97, + "eval_gen_len": 3.6261, + "eval_loss": 0.8265025019645691, + "eval_rouge1": 0.0127, + "eval_rouge2": 0.0007, + "eval_rougeL": 0.0103, + "eval_rougeLsum": 0.01, + "eval_runtime": 12.2122, + "eval_samples_per_second": 9.417, + "eval_steps_per_second": 1.638, + "step": 4966 + }, + { + "epoch": 258.96, + "eval_gen_len": 3.2609, + "eval_loss": 0.8242406845092773, + "eval_rouge1": 0.0122, + "eval_rouge2": 0.0013, + "eval_rougeL": 0.0102, + "eval_rougeLsum": 0.0101, + "eval_runtime": 12.2099, + "eval_samples_per_second": 9.419, + "eval_steps_per_second": 1.638, + "step": 4985 + }, + { + "epoch": 259.74, + "grad_norm": 0.4785182774066925, + "learning_rate": 4.741052631578948e-06, + "loss": 0.9442, + "step": 5000 + }, + { + "epoch": 260.0, + "eval_gen_len": 2.8, + "eval_loss": 0.8225219249725342, + "eval_rouge1": 0.0097, + "eval_rouge2": 0.0011, + "eval_rougeL": 0.0082, + "eval_rougeLsum": 0.0082, + "eval_runtime": 12.9652, + "eval_samples_per_second": 8.87, + "eval_steps_per_second": 1.543, + "step": 5005 + }, + { + "epoch": 260.99, + "eval_gen_len": 2.5652, + "eval_loss": 0.8206771612167358, + "eval_rouge1": 0.0087, + "eval_rouge2": 0.0011, + "eval_rougeL": 0.0071, + "eval_rougeLsum": 0.0069, + "eval_runtime": 12.2186, + "eval_samples_per_second": 9.412, + "eval_steps_per_second": 1.637, + "step": 5024 + }, + { + "epoch": 261.97, + "eval_gen_len": 2.2348, + "eval_loss": 0.818169891834259, + "eval_rouge1": 0.0072, + "eval_rouge2": 0.0005, + "eval_rougeL": 0.0059, + "eval_rougeLsum": 0.0058, + "eval_runtime": 12.5714, + "eval_samples_per_second": 9.148, + "eval_steps_per_second": 1.591, + "step": 5043 + }, + { + "epoch": 262.96, + "eval_gen_len": 2.2, + "eval_loss": 0.8162385821342468, + "eval_rouge1": 0.0062, + "eval_rouge2": 0.0002, + "eval_rougeL": 0.0051, + "eval_rougeLsum": 0.0051, + "eval_runtime": 12.5747, + "eval_samples_per_second": 9.145, + "eval_steps_per_second": 1.59, + "step": 5062 + }, + { + "epoch": 264.0, + "eval_gen_len": 2.2087, + "eval_loss": 0.8145304918289185, + "eval_rouge1": 0.0068, + "eval_rouge2": 0.0005, + "eval_rougeL": 0.0056, + "eval_rougeLsum": 0.0057, + "eval_runtime": 12.6293, + "eval_samples_per_second": 9.106, + "eval_steps_per_second": 1.584, + "step": 5082 + }, + { + "epoch": 264.99, + "eval_gen_len": 2.3304, + "eval_loss": 0.8127499222755432, + "eval_rouge1": 0.0086, + "eval_rouge2": 0.002, + "eval_rougeL": 0.0076, + "eval_rougeLsum": 0.0075, + "eval_runtime": 13.2312, + "eval_samples_per_second": 8.692, + "eval_steps_per_second": 1.512, + "step": 5101 + }, + { + "epoch": 265.97, + "eval_gen_len": 1.8957, + "eval_loss": 0.811177670955658, + "eval_rouge1": 0.0057, + "eval_rouge2": 0.001, + "eval_rougeL": 0.0052, + "eval_rougeLsum": 0.0051, + "eval_runtime": 12.5731, + "eval_samples_per_second": 9.147, + "eval_steps_per_second": 1.591, + "step": 5120 + }, + { + "epoch": 266.96, + "eval_gen_len": 1.513, + "eval_loss": 0.8090675473213196, + "eval_rouge1": 0.0042, + "eval_rouge2": 0.0007, + "eval_rougeL": 0.004, + "eval_rougeLsum": 0.004, + "eval_runtime": 12.391, + "eval_samples_per_second": 9.281, + "eval_steps_per_second": 1.614, + "step": 5139 + }, + { + "epoch": 268.0, + "eval_gen_len": 1.2435, + "eval_loss": 0.8073368668556213, + "eval_rouge1": 0.0031, + "eval_rouge2": 0.0006, + "eval_rougeL": 0.0029, + "eval_rougeLsum": 0.0029, + "eval_runtime": 12.3681, + "eval_samples_per_second": 9.298, + "eval_steps_per_second": 1.617, + "step": 5159 + }, + { + "epoch": 268.99, + "eval_gen_len": 1.0348, + "eval_loss": 0.8059150576591492, + "eval_rouge1": 0.0031, + "eval_rouge2": 0.0006, + "eval_rougeL": 0.0029, + "eval_rougeLsum": 0.0029, + "eval_runtime": 12.3463, + "eval_samples_per_second": 9.315, + "eval_steps_per_second": 1.62, + "step": 5178 + }, + { + "epoch": 269.97, + "eval_gen_len": 0.6348, + "eval_loss": 0.8042454123497009, + "eval_rouge1": 0.0007, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0007, + "eval_rougeLsum": 0.0007, + "eval_runtime": 12.4194, + "eval_samples_per_second": 9.26, + "eval_steps_per_second": 1.61, + "step": 5197 + }, + { + "epoch": 270.96, + "eval_gen_len": 0.7304, + "eval_loss": 0.8023030161857605, + "eval_rouge1": 0.0012, + "eval_rouge2": 0.0003, + "eval_rougeL": 0.0011, + "eval_rougeLsum": 0.001, + "eval_runtime": 12.5901, + "eval_samples_per_second": 9.134, + "eval_steps_per_second": 1.589, + "step": 5216 + }, + { + "epoch": 272.0, + "eval_gen_len": 0.8, + "eval_loss": 0.8001125454902649, + "eval_rouge1": 0.0012, + "eval_rouge2": 0.0003, + "eval_rougeL": 0.0011, + "eval_rougeLsum": 0.001, + "eval_runtime": 12.5732, + "eval_samples_per_second": 9.146, + "eval_steps_per_second": 1.591, + "step": 5236 + }, + { + "epoch": 272.99, + "eval_gen_len": 0.6348, + "eval_loss": 0.7986020445823669, + "eval_rouge1": 0.0012, + "eval_rouge2": 0.0003, + "eval_rougeL": 0.0011, + "eval_rougeLsum": 0.001, + "eval_runtime": 12.389, + "eval_samples_per_second": 9.282, + "eval_steps_per_second": 1.614, + "step": 5255 + }, + { + "epoch": 273.97, + "eval_gen_len": 0.7478, + "eval_loss": 0.7969604730606079, + "eval_rouge1": 0.0012, + "eval_rouge2": 0.0003, + "eval_rougeL": 0.0011, + "eval_rougeLsum": 0.001, + "eval_runtime": 12.3419, + "eval_samples_per_second": 9.318, + "eval_steps_per_second": 1.62, + "step": 5274 + }, + { + "epoch": 274.96, + "eval_gen_len": 0.5826, + "eval_loss": 0.795600175857544, + "eval_rouge1": 0.0004, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0004, + "eval_rougeLsum": 0.0004, + "eval_runtime": 12.3627, + "eval_samples_per_second": 9.302, + "eval_steps_per_second": 1.618, + "step": 5293 + }, + { + "epoch": 276.0, + "eval_gen_len": 0.4, + "eval_loss": 0.7938172817230225, + "eval_rouge1": 0.0004, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0004, + "eval_rougeLsum": 0.0004, + "eval_runtime": 12.3706, + "eval_samples_per_second": 9.296, + "eval_steps_per_second": 1.617, + "step": 5313 + }, + { + "epoch": 276.99, + "eval_gen_len": 0.2261, + "eval_loss": 0.7923696041107178, + "eval_rouge1": 0.0002, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0002, + "eval_rougeLsum": 0.0002, + "eval_runtime": 12.4258, + "eval_samples_per_second": 9.255, + "eval_steps_per_second": 1.61, + "step": 5332 + }, + { + "epoch": 277.97, + "eval_gen_len": 0.2261, + "eval_loss": 0.7907570600509644, + "eval_rouge1": 0.0002, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0002, + "eval_rougeLsum": 0.0002, + "eval_runtime": 12.5909, + "eval_samples_per_second": 9.134, + "eval_steps_per_second": 1.588, + "step": 5351 + }, + { + "epoch": 278.96, + "eval_gen_len": 0.2, + "eval_loss": 0.7891109585762024, + "eval_rouge1": 0.0002, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0002, + "eval_rougeLsum": 0.0002, + "eval_runtime": 13.0018, + "eval_samples_per_second": 8.845, + "eval_steps_per_second": 1.538, + "step": 5370 + }, + { + "epoch": 280.0, + "eval_gen_len": 0.1826, + "eval_loss": 0.787673830986023, + "eval_rouge1": 0.0002, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0002, + "eval_rougeLsum": 0.0002, + "eval_runtime": 12.0698, + "eval_samples_per_second": 9.528, + "eval_steps_per_second": 1.657, + "step": 5390 + }, + { + "epoch": 280.99, + "eval_gen_len": 0.1739, + "eval_loss": 0.785959005355835, + "eval_rouge1": 0.0002, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0002, + "eval_rougeLsum": 0.0002, + "eval_runtime": 12.4254, + "eval_samples_per_second": 9.255, + "eval_steps_per_second": 1.61, + "step": 5409 + }, + { + "epoch": 281.97, + "eval_gen_len": 0.1739, + "eval_loss": 0.7843196988105774, + "eval_rouge1": 0.0002, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0002, + "eval_rougeLsum": 0.0002, + "eval_runtime": 12.6986, + "eval_samples_per_second": 9.056, + "eval_steps_per_second": 1.575, + "step": 5428 + }, + { + "epoch": 282.96, + "eval_gen_len": 0.1739, + "eval_loss": 0.7826663851737976, + "eval_rouge1": 0.0002, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0002, + "eval_rougeLsum": 0.0002, + "eval_runtime": 12.2827, + "eval_samples_per_second": 9.363, + "eval_steps_per_second": 1.628, + "step": 5447 + }, + { + "epoch": 284.0, + "eval_gen_len": 0.1739, + "eval_loss": 0.7811480760574341, + "eval_rouge1": 0.0002, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0002, + "eval_rougeLsum": 0.0002, + "eval_runtime": 12.6013, + "eval_samples_per_second": 9.126, + "eval_steps_per_second": 1.587, + "step": 5467 + }, + { + "epoch": 284.99, + "eval_gen_len": 0.1652, + "eval_loss": 0.7799001932144165, + "eval_rouge1": 0.0002, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0002, + "eval_rougeLsum": 0.0002, + "eval_runtime": 12.3727, + "eval_samples_per_second": 9.295, + "eval_steps_per_second": 1.616, + "step": 5486 + }, + { + "epoch": 285.71, + "grad_norm": 0.36042362451553345, + "learning_rate": 4.214736842105263e-06, + "loss": 0.8855, + "step": 5500 + }, + { + "epoch": 285.97, + "eval_gen_len": 0.1652, + "eval_loss": 0.7784348726272583, + "eval_rouge1": 0.0002, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0002, + "eval_rougeLsum": 0.0002, + "eval_runtime": 12.4298, + "eval_samples_per_second": 9.252, + "eval_steps_per_second": 1.609, + "step": 5505 + }, + { + "epoch": 286.96, + "eval_gen_len": 0.1652, + "eval_loss": 0.7772350311279297, + "eval_rouge1": 0.0002, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0002, + "eval_rougeLsum": 0.0002, + "eval_runtime": 12.5935, + "eval_samples_per_second": 9.132, + "eval_steps_per_second": 1.588, + "step": 5524 + }, + { + "epoch": 288.0, + "eval_gen_len": 0.1652, + "eval_loss": 0.775896430015564, + "eval_rouge1": 0.0002, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0002, + "eval_rougeLsum": 0.0002, + "eval_runtime": 12.6907, + "eval_samples_per_second": 9.062, + "eval_steps_per_second": 1.576, + "step": 5544 + }, + { + "epoch": 288.99, + "eval_gen_len": 0.1652, + "eval_loss": 0.7743993401527405, + "eval_rouge1": 0.0002, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0002, + "eval_rougeLsum": 0.0002, + "eval_runtime": 12.7195, + "eval_samples_per_second": 9.041, + "eval_steps_per_second": 1.572, + "step": 5563 + }, + { + "epoch": 289.97, + "eval_gen_len": 0.1652, + "eval_loss": 0.7728458046913147, + "eval_rouge1": 0.0002, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0002, + "eval_rougeLsum": 0.0002, + "eval_runtime": 12.71, + "eval_samples_per_second": 9.048, + "eval_steps_per_second": 1.574, + "step": 5582 + }, + { + "epoch": 290.96, + "eval_gen_len": 0.0, + "eval_loss": 0.7715795636177063, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.6969, + "eval_samples_per_second": 9.057, + "eval_steps_per_second": 1.575, + "step": 5601 + }, + { + "epoch": 292.0, + "eval_gen_len": 0.0, + "eval_loss": 0.7701930999755859, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.902, + "eval_samples_per_second": 8.913, + "eval_steps_per_second": 1.55, + "step": 5621 + }, + { + "epoch": 292.99, + "eval_gen_len": 0.0, + "eval_loss": 0.7691376209259033, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.3513, + "eval_samples_per_second": 9.311, + "eval_steps_per_second": 1.619, + "step": 5640 + }, + { + "epoch": 293.97, + "eval_gen_len": 0.0, + "eval_loss": 0.7679579257965088, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.4173, + "eval_samples_per_second": 9.261, + "eval_steps_per_second": 1.611, + "step": 5659 + }, + { + "epoch": 294.96, + "eval_gen_len": 0.0, + "eval_loss": 0.7667289972305298, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.6438, + "eval_samples_per_second": 9.095, + "eval_steps_per_second": 1.582, + "step": 5678 + }, + { + "epoch": 296.0, + "eval_gen_len": 0.0, + "eval_loss": 0.7650233507156372, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.1233, + "eval_samples_per_second": 9.486, + "eval_steps_per_second": 1.65, + "step": 5698 + }, + { + "epoch": 296.99, + "eval_gen_len": 0.0, + "eval_loss": 0.7638988494873047, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.1018, + "eval_samples_per_second": 9.503, + "eval_steps_per_second": 1.653, + "step": 5717 + }, + { + "epoch": 297.97, + "eval_gen_len": 0.0, + "eval_loss": 0.7627271413803101, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.0736, + "eval_samples_per_second": 9.525, + "eval_steps_per_second": 1.657, + "step": 5736 + }, + { + "epoch": 298.96, + "eval_gen_len": 0.0, + "eval_loss": 0.761401891708374, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.4179, + "eval_samples_per_second": 9.261, + "eval_steps_per_second": 1.611, + "step": 5755 + }, + { + "epoch": 300.0, + "eval_gen_len": 0.0, + "eval_loss": 0.7603045105934143, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.1939, + "eval_samples_per_second": 9.431, + "eval_steps_per_second": 1.64, + "step": 5775 + }, + { + "epoch": 300.99, + "eval_gen_len": 0.0, + "eval_loss": 0.7593241333961487, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.4302, + "eval_samples_per_second": 9.252, + "eval_steps_per_second": 1.609, + "step": 5794 + }, + { + "epoch": 301.97, + "eval_gen_len": 0.0, + "eval_loss": 0.7581080198287964, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.7272, + "eval_samples_per_second": 9.036, + "eval_steps_per_second": 1.571, + "step": 5813 + }, + { + "epoch": 302.96, + "eval_gen_len": 0.0, + "eval_loss": 0.7565290927886963, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.7621, + "eval_samples_per_second": 9.011, + "eval_steps_per_second": 1.567, + "step": 5832 + }, + { + "epoch": 304.0, + "eval_gen_len": 0.0, + "eval_loss": 0.7556654810905457, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.4563, + "eval_samples_per_second": 9.232, + "eval_steps_per_second": 1.606, + "step": 5852 + }, + { + "epoch": 304.99, + "eval_gen_len": 0.0, + "eval_loss": 0.754369854927063, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.207, + "eval_samples_per_second": 9.421, + "eval_steps_per_second": 1.638, + "step": 5871 + }, + { + "epoch": 305.97, + "eval_gen_len": 0.0, + "eval_loss": 0.7534385919570923, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.6305, + "eval_samples_per_second": 9.105, + "eval_steps_per_second": 1.583, + "step": 5890 + }, + { + "epoch": 306.96, + "eval_gen_len": 0.0, + "eval_loss": 0.7526547908782959, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.4513, + "eval_samples_per_second": 9.236, + "eval_steps_per_second": 1.606, + "step": 5909 + }, + { + "epoch": 308.0, + "eval_gen_len": 0.0, + "eval_loss": 0.7513379454612732, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.349, + "eval_samples_per_second": 9.312, + "eval_steps_per_second": 1.62, + "step": 5929 + }, + { + "epoch": 308.99, + "eval_gen_len": 0.0, + "eval_loss": 0.7506363987922668, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.0976, + "eval_samples_per_second": 9.506, + "eval_steps_per_second": 1.653, + "step": 5948 + }, + { + "epoch": 309.97, + "eval_gen_len": 0.0, + "eval_loss": 0.7496155500411987, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.3015, + "eval_samples_per_second": 9.348, + "eval_steps_per_second": 1.626, + "step": 5967 + }, + { + "epoch": 310.96, + "eval_gen_len": 0.0, + "eval_loss": 0.7488384246826172, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.3367, + "eval_samples_per_second": 9.322, + "eval_steps_per_second": 1.621, + "step": 5986 + }, + { + "epoch": 311.69, + "grad_norm": 0.3260189890861511, + "learning_rate": 3.6884210526315794e-06, + "loss": 0.8402, + "step": 6000 + }, + { + "epoch": 312.0, + "eval_gen_len": 0.0, + "eval_loss": 0.7474696040153503, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.3992, + "eval_samples_per_second": 9.275, + "eval_steps_per_second": 1.613, + "step": 6006 + }, + { + "epoch": 312.99, + "eval_gen_len": 0.0, + "eval_loss": 0.7464930415153503, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 11.9232, + "eval_samples_per_second": 9.645, + "eval_steps_per_second": 1.677, + "step": 6025 + }, + { + "epoch": 313.97, + "eval_gen_len": 0.0, + "eval_loss": 0.7456102967262268, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.6801, + "eval_samples_per_second": 9.069, + "eval_steps_per_second": 1.577, + "step": 6044 + }, + { + "epoch": 314.96, + "eval_gen_len": 0.0, + "eval_loss": 0.7446662783622742, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.3568, + "eval_samples_per_second": 9.307, + "eval_steps_per_second": 1.619, + "step": 6063 + }, + { + "epoch": 316.0, + "eval_gen_len": 0.0, + "eval_loss": 0.7433856725692749, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 11.987, + "eval_samples_per_second": 9.594, + "eval_steps_per_second": 1.668, + "step": 6083 + }, + { + "epoch": 316.99, + "eval_gen_len": 0.0, + "eval_loss": 0.7426111698150635, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.6966, + "eval_samples_per_second": 9.058, + "eval_steps_per_second": 1.575, + "step": 6102 + }, + { + "epoch": 317.97, + "eval_gen_len": 0.0, + "eval_loss": 0.7413787841796875, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.2025, + "eval_samples_per_second": 9.424, + "eval_steps_per_second": 1.639, + "step": 6121 + }, + { + "epoch": 318.96, + "eval_gen_len": 0.0, + "eval_loss": 0.7404425144195557, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.9943, + "eval_samples_per_second": 8.85, + "eval_steps_per_second": 1.539, + "step": 6140 + }, + { + "epoch": 320.0, + "eval_gen_len": 0.0, + "eval_loss": 0.7396877408027649, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.3803, + "eval_samples_per_second": 9.289, + "eval_steps_per_second": 1.615, + "step": 6160 + }, + { + "epoch": 320.99, + "eval_gen_len": 0.0, + "eval_loss": 0.739030122756958, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 13.3182, + "eval_samples_per_second": 8.635, + "eval_steps_per_second": 1.502, + "step": 6179 + }, + { + "epoch": 321.97, + "eval_gen_len": 0.0, + "eval_loss": 0.7381538152694702, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.7464, + "eval_samples_per_second": 9.022, + "eval_steps_per_second": 1.569, + "step": 6198 + }, + { + "epoch": 322.96, + "eval_gen_len": 0.0, + "eval_loss": 0.7372981309890747, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.1988, + "eval_samples_per_second": 9.427, + "eval_steps_per_second": 1.64, + "step": 6217 + }, + { + "epoch": 324.0, + "eval_gen_len": 0.0, + "eval_loss": 0.7361249327659607, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 13.3472, + "eval_samples_per_second": 8.616, + "eval_steps_per_second": 1.498, + "step": 6237 + }, + { + "epoch": 324.99, + "eval_gen_len": 0.0, + "eval_loss": 0.735177218914032, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.2208, + "eval_samples_per_second": 9.41, + "eval_steps_per_second": 1.637, + "step": 6256 + }, + { + "epoch": 325.97, + "eval_gen_len": 0.0, + "eval_loss": 0.7344561219215393, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.9594, + "eval_samples_per_second": 8.874, + "eval_steps_per_second": 1.543, + "step": 6275 + }, + { + "epoch": 326.96, + "eval_gen_len": 0.0, + "eval_loss": 0.7334731817245483, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 13.1094, + "eval_samples_per_second": 8.772, + "eval_steps_per_second": 1.526, + "step": 6294 + }, + { + "epoch": 328.0, + "eval_gen_len": 0.0, + "eval_loss": 0.7326551079750061, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.7729, + "eval_samples_per_second": 9.003, + "eval_steps_per_second": 1.566, + "step": 6314 + }, + { + "epoch": 328.99, + "eval_gen_len": 0.0, + "eval_loss": 0.7316291332244873, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.415, + "eval_samples_per_second": 9.263, + "eval_steps_per_second": 1.611, + "step": 6333 + }, + { + "epoch": 329.97, + "eval_gen_len": 0.0, + "eval_loss": 0.7311994433403015, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.9044, + "eval_samples_per_second": 8.912, + "eval_steps_per_second": 1.55, + "step": 6352 + }, + { + "epoch": 330.96, + "eval_gen_len": 0.0, + "eval_loss": 0.7306154370307922, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 13.0335, + "eval_samples_per_second": 8.823, + "eval_steps_per_second": 1.535, + "step": 6371 + }, + { + "epoch": 332.0, + "eval_gen_len": 0.0, + "eval_loss": 0.7298057675361633, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.5384, + "eval_samples_per_second": 9.172, + "eval_steps_per_second": 1.595, + "step": 6391 + }, + { + "epoch": 332.99, + "eval_gen_len": 0.0, + "eval_loss": 0.7290323972702026, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.7202, + "eval_samples_per_second": 9.041, + "eval_steps_per_second": 1.572, + "step": 6410 + }, + { + "epoch": 333.97, + "eval_gen_len": 0.0, + "eval_loss": 0.7283275127410889, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.5197, + "eval_samples_per_second": 9.186, + "eval_steps_per_second": 1.597, + "step": 6429 + }, + { + "epoch": 334.96, + "eval_gen_len": 0.0, + "eval_loss": 0.7273982763290405, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.6808, + "eval_samples_per_second": 9.069, + "eval_steps_per_second": 1.577, + "step": 6448 + }, + { + "epoch": 336.0, + "eval_gen_len": 0.0, + "eval_loss": 0.7265883684158325, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 13.0488, + "eval_samples_per_second": 8.813, + "eval_steps_per_second": 1.533, + "step": 6468 + }, + { + "epoch": 336.99, + "eval_gen_len": 0.0, + "eval_loss": 0.7261592745780945, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.6573, + "eval_samples_per_second": 9.086, + "eval_steps_per_second": 1.58, + "step": 6487 + }, + { + "epoch": 337.66, + "grad_norm": 0.288989782333374, + "learning_rate": 3.1621052631578953e-06, + "loss": 0.8058, + "step": 6500 + }, + { + "epoch": 337.97, + "eval_gen_len": 0.0, + "eval_loss": 0.7252629995346069, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.4295, + "eval_samples_per_second": 9.252, + "eval_steps_per_second": 1.609, + "step": 6506 + }, + { + "epoch": 338.96, + "eval_gen_len": 0.0, + "eval_loss": 0.7245468497276306, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.7865, + "eval_samples_per_second": 8.994, + "eval_steps_per_second": 1.564, + "step": 6525 + }, + { + "epoch": 340.0, + "eval_gen_len": 0.0, + "eval_loss": 0.7239726185798645, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.9112, + "eval_samples_per_second": 8.907, + "eval_steps_per_second": 1.549, + "step": 6545 + }, + { + "epoch": 340.99, + "eval_gen_len": 0.0, + "eval_loss": 0.7231466770172119, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.5487, + "eval_samples_per_second": 9.164, + "eval_steps_per_second": 1.594, + "step": 6564 + }, + { + "epoch": 341.97, + "eval_gen_len": 0.0, + "eval_loss": 0.7223904728889465, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 13.3347, + "eval_samples_per_second": 8.624, + "eval_steps_per_second": 1.5, + "step": 6583 + }, + { + "epoch": 342.96, + "eval_gen_len": 0.0, + "eval_loss": 0.7218188643455505, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 13.0031, + "eval_samples_per_second": 8.844, + "eval_steps_per_second": 1.538, + "step": 6602 + }, + { + "epoch": 344.0, + "eval_gen_len": 0.0, + "eval_loss": 0.7209810614585876, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.4608, + "eval_samples_per_second": 9.229, + "eval_steps_per_second": 1.605, + "step": 6622 + }, + { + "epoch": 344.99, + "eval_gen_len": 0.0, + "eval_loss": 0.7203324437141418, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.5304, + "eval_samples_per_second": 9.178, + "eval_steps_per_second": 1.596, + "step": 6641 + }, + { + "epoch": 345.97, + "eval_gen_len": 0.0, + "eval_loss": 0.7196723818778992, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.6257, + "eval_samples_per_second": 9.108, + "eval_steps_per_second": 1.584, + "step": 6660 + }, + { + "epoch": 346.96, + "eval_gen_len": 0.0, + "eval_loss": 0.7190775275230408, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.4134, + "eval_samples_per_second": 9.264, + "eval_steps_per_second": 1.611, + "step": 6679 + }, + { + "epoch": 348.0, + "eval_gen_len": 0.0, + "eval_loss": 0.7185074090957642, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.166, + "eval_samples_per_second": 9.453, + "eval_steps_per_second": 1.644, + "step": 6699 + }, + { + "epoch": 348.99, + "eval_gen_len": 0.0, + "eval_loss": 0.7180371880531311, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.559, + "eval_samples_per_second": 9.157, + "eval_steps_per_second": 1.592, + "step": 6718 + }, + { + "epoch": 349.97, + "eval_gen_len": 0.0, + "eval_loss": 0.717097818851471, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.3, + "eval_samples_per_second": 9.35, + "eval_steps_per_second": 1.626, + "step": 6737 + }, + { + "epoch": 350.96, + "eval_gen_len": 0.0, + "eval_loss": 0.7164217829704285, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.3583, + "eval_samples_per_second": 9.306, + "eval_steps_per_second": 1.618, + "step": 6756 + }, + { + "epoch": 352.0, + "eval_gen_len": 0.0, + "eval_loss": 0.7158520817756653, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 13.0047, + "eval_samples_per_second": 8.843, + "eval_steps_per_second": 1.538, + "step": 6776 + }, + { + "epoch": 352.99, + "eval_gen_len": 0.0, + "eval_loss": 0.7151947021484375, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.6741, + "eval_samples_per_second": 9.074, + "eval_steps_per_second": 1.578, + "step": 6795 + }, + { + "epoch": 353.97, + "eval_gen_len": 0.0, + "eval_loss": 0.7145124077796936, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.6036, + "eval_samples_per_second": 9.124, + "eval_steps_per_second": 1.587, + "step": 6814 + }, + { + "epoch": 354.96, + "eval_gen_len": 0.0, + "eval_loss": 0.7140352725982666, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.2129, + "eval_samples_per_second": 9.416, + "eval_steps_per_second": 1.638, + "step": 6833 + }, + { + "epoch": 356.0, + "eval_gen_len": 0.0, + "eval_loss": 0.7135369777679443, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.4624, + "eval_samples_per_second": 9.228, + "eval_steps_per_second": 1.605, + "step": 6853 + }, + { + "epoch": 356.99, + "eval_gen_len": 0.0, + "eval_loss": 0.7128369808197021, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.422, + "eval_samples_per_second": 9.258, + "eval_steps_per_second": 1.61, + "step": 6872 + }, + { + "epoch": 357.97, + "eval_gen_len": 0.0, + "eval_loss": 0.7122591137886047, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.2268, + "eval_samples_per_second": 9.406, + "eval_steps_per_second": 1.636, + "step": 6891 + }, + { + "epoch": 358.96, + "eval_gen_len": 0.0, + "eval_loss": 0.7116859555244446, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.866, + "eval_samples_per_second": 8.938, + "eval_steps_per_second": 1.554, + "step": 6910 + }, + { + "epoch": 360.0, + "eval_gen_len": 0.0, + "eval_loss": 0.7112235426902771, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.4942, + "eval_samples_per_second": 9.204, + "eval_steps_per_second": 1.601, + "step": 6930 + }, + { + "epoch": 360.99, + "eval_gen_len": 0.0, + "eval_loss": 0.7106695771217346, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.4042, + "eval_samples_per_second": 9.271, + "eval_steps_per_second": 1.612, + "step": 6949 + }, + { + "epoch": 361.97, + "eval_gen_len": 0.0, + "eval_loss": 0.710101306438446, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.2879, + "eval_samples_per_second": 9.359, + "eval_steps_per_second": 1.628, + "step": 6968 + }, + { + "epoch": 362.96, + "eval_gen_len": 0.0, + "eval_loss": 0.7094107270240784, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.269, + "eval_samples_per_second": 9.373, + "eval_steps_per_second": 1.63, + "step": 6987 + }, + { + "epoch": 363.64, + "grad_norm": 0.44062402844429016, + "learning_rate": 2.635789473684211e-06, + "loss": 0.7798, + "step": 7000 + }, + { + "epoch": 364.0, + "eval_gen_len": 0.0, + "eval_loss": 0.708891749382019, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.9085, + "eval_samples_per_second": 8.909, + "eval_steps_per_second": 1.549, + "step": 7007 + }, + { + "epoch": 364.99, + "eval_gen_len": 0.0, + "eval_loss": 0.7082711458206177, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.1554, + "eval_samples_per_second": 9.461, + "eval_steps_per_second": 1.645, + "step": 7026 + }, + { + "epoch": 365.97, + "eval_gen_len": 0.0, + "eval_loss": 0.7078844904899597, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.6546, + "eval_samples_per_second": 9.088, + "eval_steps_per_second": 1.58, + "step": 7045 + }, + { + "epoch": 366.96, + "eval_gen_len": 0.0, + "eval_loss": 0.7073128819465637, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.7172, + "eval_samples_per_second": 9.043, + "eval_steps_per_second": 1.573, + "step": 7064 + }, + { + "epoch": 368.0, + "eval_gen_len": 0.0, + "eval_loss": 0.7066096067428589, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.1125, + "eval_samples_per_second": 9.494, + "eval_steps_per_second": 1.651, + "step": 7084 + }, + { + "epoch": 368.99, + "eval_gen_len": 0.0, + "eval_loss": 0.7057228088378906, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.5605, + "eval_samples_per_second": 9.156, + "eval_steps_per_second": 1.592, + "step": 7103 + }, + { + "epoch": 369.97, + "eval_gen_len": 0.0, + "eval_loss": 0.7048721313476562, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.3178, + "eval_samples_per_second": 9.336, + "eval_steps_per_second": 1.624, + "step": 7122 + }, + { + "epoch": 370.96, + "eval_gen_len": 0.0, + "eval_loss": 0.7041941285133362, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.2739, + "eval_samples_per_second": 9.369, + "eval_steps_per_second": 1.629, + "step": 7141 + }, + { + "epoch": 372.0, + "eval_gen_len": 0.0, + "eval_loss": 0.7035704255104065, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.3207, + "eval_samples_per_second": 9.334, + "eval_steps_per_second": 1.623, + "step": 7161 + }, + { + "epoch": 372.99, + "eval_gen_len": 0.0, + "eval_loss": 0.7029441595077515, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.094, + "eval_samples_per_second": 9.509, + "eval_steps_per_second": 1.654, + "step": 7180 + }, + { + "epoch": 373.97, + "eval_gen_len": 0.0, + "eval_loss": 0.7022525668144226, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.7042, + "eval_samples_per_second": 9.052, + "eval_steps_per_second": 1.574, + "step": 7199 + }, + { + "epoch": 374.96, + "eval_gen_len": 0.0, + "eval_loss": 0.7016597986221313, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.3663, + "eval_samples_per_second": 9.299, + "eval_steps_per_second": 1.617, + "step": 7218 + }, + { + "epoch": 376.0, + "eval_gen_len": 0.0, + "eval_loss": 0.7011125683784485, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.2533, + "eval_samples_per_second": 9.385, + "eval_steps_per_second": 1.632, + "step": 7238 + }, + { + "epoch": 376.99, + "eval_gen_len": 0.0, + "eval_loss": 0.7006986737251282, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.4459, + "eval_samples_per_second": 9.24, + "eval_steps_per_second": 1.607, + "step": 7257 + }, + { + "epoch": 377.97, + "eval_gen_len": 0.0, + "eval_loss": 0.7000675201416016, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.5813, + "eval_samples_per_second": 9.141, + "eval_steps_per_second": 1.59, + "step": 7276 + }, + { + "epoch": 378.96, + "eval_gen_len": 0.0, + "eval_loss": 0.6994682550430298, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.7191, + "eval_samples_per_second": 9.041, + "eval_steps_per_second": 1.572, + "step": 7295 + }, + { + "epoch": 380.0, + "eval_gen_len": 0.0, + "eval_loss": 0.6987762451171875, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.7423, + "eval_samples_per_second": 9.025, + "eval_steps_per_second": 1.57, + "step": 7315 + }, + { + "epoch": 380.99, + "eval_gen_len": 0.0, + "eval_loss": 0.6981701254844666, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.7919, + "eval_samples_per_second": 8.99, + "eval_steps_per_second": 1.563, + "step": 7334 + }, + { + "epoch": 381.97, + "eval_gen_len": 0.0, + "eval_loss": 0.6976540088653564, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.6842, + "eval_samples_per_second": 9.066, + "eval_steps_per_second": 1.577, + "step": 7353 + }, + { + "epoch": 382.96, + "eval_gen_len": 0.0, + "eval_loss": 0.6971992254257202, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.7985, + "eval_samples_per_second": 8.985, + "eval_steps_per_second": 1.563, + "step": 7372 + }, + { + "epoch": 384.0, + "eval_gen_len": 0.0, + "eval_loss": 0.6967973113059998, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.5355, + "eval_samples_per_second": 9.174, + "eval_steps_per_second": 1.595, + "step": 7392 + }, + { + "epoch": 384.99, + "eval_gen_len": 0.0, + "eval_loss": 0.6962406039237976, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.7996, + "eval_samples_per_second": 8.985, + "eval_steps_per_second": 1.563, + "step": 7411 + }, + { + "epoch": 385.97, + "eval_gen_len": 0.0, + "eval_loss": 0.6957660913467407, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.4031, + "eval_samples_per_second": 9.272, + "eval_steps_per_second": 1.612, + "step": 7430 + }, + { + "epoch": 386.96, + "eval_gen_len": 0.0, + "eval_loss": 0.6953439116477966, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 13.2572, + "eval_samples_per_second": 8.675, + "eval_steps_per_second": 1.509, + "step": 7449 + }, + { + "epoch": 388.0, + "eval_gen_len": 0.0, + "eval_loss": 0.6948480606079102, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.5809, + "eval_samples_per_second": 9.141, + "eval_steps_per_second": 1.59, + "step": 7469 + }, + { + "epoch": 388.99, + "eval_gen_len": 0.0, + "eval_loss": 0.6944136023521423, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 12.3151, + "eval_samples_per_second": 9.338, + "eval_steps_per_second": 1.624, + "step": 7488 + }, + { + "epoch": 389.61, + "grad_norm": 0.2672542333602905, + "learning_rate": 2.1094736842105264e-06, + "loss": 0.7599, + "step": 7500 + } + ], + "logging_steps": 500, + "max_steps": 9500, + "num_input_tokens_seen": 0, + "num_train_epochs": 500, + "save_steps": 500, + "total_flos": 2.1853261061318246e+17, + "train_batch_size": 6, + "trial_name": null, + "trial_params": null +}