|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 467.53246753246754, |
|
"eval_steps": 500, |
|
"global_step": 9000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.99, |
|
"eval_gen_len": 14.3217, |
|
"eval_loss": 21.81446075439453, |
|
"eval_rouge1": 0.0662, |
|
"eval_rouge2": 0.0082, |
|
"eval_rougeL": 0.0523, |
|
"eval_rougeLsum": 0.0526, |
|
"eval_runtime": 12.9343, |
|
"eval_samples_per_second": 8.891, |
|
"eval_steps_per_second": 1.546, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_gen_len": 14.4, |
|
"eval_loss": 21.760791778564453, |
|
"eval_rouge1": 0.0661, |
|
"eval_rouge2": 0.0082, |
|
"eval_rougeL": 0.052, |
|
"eval_rougeLsum": 0.0523, |
|
"eval_runtime": 11.8223, |
|
"eval_samples_per_second": 9.727, |
|
"eval_steps_per_second": 1.692, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_gen_len": 14.4, |
|
"eval_loss": 21.68472671508789, |
|
"eval_rouge1": 0.0651, |
|
"eval_rouge2": 0.0077, |
|
"eval_rougeL": 0.051, |
|
"eval_rougeLsum": 0.0513, |
|
"eval_runtime": 12.1423, |
|
"eval_samples_per_second": 9.471, |
|
"eval_steps_per_second": 1.647, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_gen_len": 14.3304, |
|
"eval_loss": 21.582918167114258, |
|
"eval_rouge1": 0.0666, |
|
"eval_rouge2": 0.0081, |
|
"eval_rougeL": 0.0525, |
|
"eval_rougeLsum": 0.0525, |
|
"eval_runtime": 12.0406, |
|
"eval_samples_per_second": 9.551, |
|
"eval_steps_per_second": 1.661, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_gen_len": 14.2609, |
|
"eval_loss": 21.46294403076172, |
|
"eval_rouge1": 0.0668, |
|
"eval_rouge2": 0.008, |
|
"eval_rougeL": 0.0527, |
|
"eval_rougeLsum": 0.0526, |
|
"eval_runtime": 12.3725, |
|
"eval_samples_per_second": 9.295, |
|
"eval_steps_per_second": 1.616, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"eval_gen_len": 14.2261, |
|
"eval_loss": 21.31937599182129, |
|
"eval_rouge1": 0.0671, |
|
"eval_rouge2": 0.0077, |
|
"eval_rougeL": 0.0525, |
|
"eval_rougeLsum": 0.0525, |
|
"eval_runtime": 12.5289, |
|
"eval_samples_per_second": 9.179, |
|
"eval_steps_per_second": 1.596, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"eval_gen_len": 14.3043, |
|
"eval_loss": 21.15254783630371, |
|
"eval_rouge1": 0.0677, |
|
"eval_rouge2": 0.0077, |
|
"eval_rougeL": 0.0525, |
|
"eval_rougeLsum": 0.0525, |
|
"eval_runtime": 12.0936, |
|
"eval_samples_per_second": 9.509, |
|
"eval_steps_per_second": 1.654, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_gen_len": 14.2609, |
|
"eval_loss": 20.956642150878906, |
|
"eval_rouge1": 0.0679, |
|
"eval_rouge2": 0.0076, |
|
"eval_rougeL": 0.0522, |
|
"eval_rougeLsum": 0.0521, |
|
"eval_runtime": 12.6351, |
|
"eval_samples_per_second": 9.102, |
|
"eval_steps_per_second": 1.583, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_gen_len": 14.1217, |
|
"eval_loss": 20.7694091796875, |
|
"eval_rouge1": 0.0664, |
|
"eval_rouge2": 0.0074, |
|
"eval_rougeL": 0.0507, |
|
"eval_rougeLsum": 0.0507, |
|
"eval_runtime": 12.3661, |
|
"eval_samples_per_second": 9.3, |
|
"eval_steps_per_second": 1.617, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"eval_gen_len": 14.0957, |
|
"eval_loss": 20.60512351989746, |
|
"eval_rouge1": 0.0685, |
|
"eval_rouge2": 0.0078, |
|
"eval_rougeL": 0.0519, |
|
"eval_rougeLsum": 0.0518, |
|
"eval_runtime": 12.6094, |
|
"eval_samples_per_second": 9.12, |
|
"eval_steps_per_second": 1.586, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 10.96, |
|
"eval_gen_len": 13.9913, |
|
"eval_loss": 20.46657371520996, |
|
"eval_rouge1": 0.0672, |
|
"eval_rouge2": 0.0072, |
|
"eval_rougeL": 0.0511, |
|
"eval_rougeLsum": 0.0511, |
|
"eval_runtime": 12.4343, |
|
"eval_samples_per_second": 9.249, |
|
"eval_steps_per_second": 1.608, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_gen_len": 14.2957, |
|
"eval_loss": 20.335567474365234, |
|
"eval_rouge1": 0.0658, |
|
"eval_rouge2": 0.0079, |
|
"eval_rougeL": 0.05, |
|
"eval_rougeLsum": 0.05, |
|
"eval_runtime": 12.4623, |
|
"eval_samples_per_second": 9.228, |
|
"eval_steps_per_second": 1.605, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"eval_gen_len": 14.7826, |
|
"eval_loss": 20.210546493530273, |
|
"eval_rouge1": 0.0729, |
|
"eval_rouge2": 0.0108, |
|
"eval_rougeL": 0.0562, |
|
"eval_rougeLsum": 0.056, |
|
"eval_runtime": 12.6201, |
|
"eval_samples_per_second": 9.112, |
|
"eval_steps_per_second": 1.585, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 13.97, |
|
"eval_gen_len": 14.8174, |
|
"eval_loss": 20.080350875854492, |
|
"eval_rouge1": 0.0731, |
|
"eval_rouge2": 0.0105, |
|
"eval_rougeL": 0.057, |
|
"eval_rougeLsum": 0.0566, |
|
"eval_runtime": 12.5892, |
|
"eval_samples_per_second": 9.135, |
|
"eval_steps_per_second": 1.589, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 14.96, |
|
"eval_gen_len": 15.3826, |
|
"eval_loss": 19.947362899780273, |
|
"eval_rouge1": 0.0805, |
|
"eval_rouge2": 0.0127, |
|
"eval_rougeL": 0.0624, |
|
"eval_rougeLsum": 0.0619, |
|
"eval_runtime": 12.2446, |
|
"eval_samples_per_second": 9.392, |
|
"eval_steps_per_second": 1.633, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_gen_len": 15.8261, |
|
"eval_loss": 19.80646324157715, |
|
"eval_rouge1": 0.0818, |
|
"eval_rouge2": 0.0129, |
|
"eval_rougeL": 0.0636, |
|
"eval_rougeLsum": 0.0633, |
|
"eval_runtime": 12.037, |
|
"eval_samples_per_second": 9.554, |
|
"eval_steps_per_second": 1.662, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"eval_gen_len": 16.1391, |
|
"eval_loss": 19.66917610168457, |
|
"eval_rouge1": 0.0837, |
|
"eval_rouge2": 0.0139, |
|
"eval_rougeL": 0.0648, |
|
"eval_rougeLsum": 0.0647, |
|
"eval_runtime": 12.6734, |
|
"eval_samples_per_second": 9.074, |
|
"eval_steps_per_second": 1.578, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 17.97, |
|
"eval_gen_len": 16.5391, |
|
"eval_loss": 19.5202579498291, |
|
"eval_rouge1": 0.0915, |
|
"eval_rouge2": 0.0168, |
|
"eval_rougeL": 0.0713, |
|
"eval_rougeLsum": 0.0709, |
|
"eval_runtime": 12.3571, |
|
"eval_samples_per_second": 9.306, |
|
"eval_steps_per_second": 1.619, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 18.96, |
|
"eval_gen_len": 16.687, |
|
"eval_loss": 19.36480712890625, |
|
"eval_rouge1": 0.0934, |
|
"eval_rouge2": 0.0178, |
|
"eval_rougeL": 0.072, |
|
"eval_rougeLsum": 0.0719, |
|
"eval_runtime": 12.3428, |
|
"eval_samples_per_second": 9.317, |
|
"eval_steps_per_second": 1.62, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_gen_len": 17.0522, |
|
"eval_loss": 19.185970306396484, |
|
"eval_rouge1": 0.0951, |
|
"eval_rouge2": 0.0172, |
|
"eval_rougeL": 0.0735, |
|
"eval_rougeLsum": 0.0735, |
|
"eval_runtime": 12.4924, |
|
"eval_samples_per_second": 9.206, |
|
"eval_steps_per_second": 1.601, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 20.99, |
|
"eval_gen_len": 17.1913, |
|
"eval_loss": 18.998315811157227, |
|
"eval_rouge1": 0.0938, |
|
"eval_rouge2": 0.0192, |
|
"eval_rougeL": 0.0754, |
|
"eval_rougeLsum": 0.0755, |
|
"eval_runtime": 12.2427, |
|
"eval_samples_per_second": 9.393, |
|
"eval_steps_per_second": 1.634, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 21.97, |
|
"eval_gen_len": 17.3304, |
|
"eval_loss": 18.782604217529297, |
|
"eval_rouge1": 0.0975, |
|
"eval_rouge2": 0.0223, |
|
"eval_rougeL": 0.0786, |
|
"eval_rougeLsum": 0.0788, |
|
"eval_runtime": 12.4341, |
|
"eval_samples_per_second": 9.249, |
|
"eval_steps_per_second": 1.608, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 22.96, |
|
"eval_gen_len": 17.3304, |
|
"eval_loss": 18.529996871948242, |
|
"eval_rouge1": 0.0986, |
|
"eval_rouge2": 0.0229, |
|
"eval_rougeL": 0.0787, |
|
"eval_rougeLsum": 0.079, |
|
"eval_runtime": 12.138, |
|
"eval_samples_per_second": 9.474, |
|
"eval_steps_per_second": 1.648, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_gen_len": 17.4696, |
|
"eval_loss": 18.21288299560547, |
|
"eval_rouge1": 0.0935, |
|
"eval_rouge2": 0.0195, |
|
"eval_rougeL": 0.0761, |
|
"eval_rougeLsum": 0.0763, |
|
"eval_runtime": 12.3892, |
|
"eval_samples_per_second": 9.282, |
|
"eval_steps_per_second": 1.614, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 24.99, |
|
"eval_gen_len": 17.6087, |
|
"eval_loss": 17.844209671020508, |
|
"eval_rouge1": 0.0936, |
|
"eval_rouge2": 0.0225, |
|
"eval_rougeL": 0.0756, |
|
"eval_rougeLsum": 0.0758, |
|
"eval_runtime": 12.2522, |
|
"eval_samples_per_second": 9.386, |
|
"eval_steps_per_second": 1.632, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 25.97, |
|
"grad_norm": 4.415005683898926, |
|
"learning_rate": 9.474736842105265e-06, |
|
"loss": 19.6383, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 25.97, |
|
"eval_gen_len": 17.7478, |
|
"eval_loss": 17.39573097229004, |
|
"eval_rouge1": 0.0967, |
|
"eval_rouge2": 0.0221, |
|
"eval_rougeL": 0.0765, |
|
"eval_rougeLsum": 0.0764, |
|
"eval_runtime": 12.5376, |
|
"eval_samples_per_second": 9.172, |
|
"eval_steps_per_second": 1.595, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 26.96, |
|
"eval_gen_len": 17.7478, |
|
"eval_loss": 16.90059471130371, |
|
"eval_rouge1": 0.0983, |
|
"eval_rouge2": 0.0198, |
|
"eval_rougeL": 0.0786, |
|
"eval_rougeLsum": 0.0785, |
|
"eval_runtime": 12.1506, |
|
"eval_samples_per_second": 9.465, |
|
"eval_steps_per_second": 1.646, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_gen_len": 17.3304, |
|
"eval_loss": 16.37845802307129, |
|
"eval_rouge1": 0.0916, |
|
"eval_rouge2": 0.0162, |
|
"eval_rougeL": 0.0723, |
|
"eval_rougeLsum": 0.0724, |
|
"eval_runtime": 12.9559, |
|
"eval_samples_per_second": 8.876, |
|
"eval_steps_per_second": 1.544, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 28.99, |
|
"eval_gen_len": 17.0522, |
|
"eval_loss": 15.880877494812012, |
|
"eval_rouge1": 0.0898, |
|
"eval_rouge2": 0.0212, |
|
"eval_rougeL": 0.0716, |
|
"eval_rougeLsum": 0.0711, |
|
"eval_runtime": 12.3917, |
|
"eval_samples_per_second": 9.28, |
|
"eval_steps_per_second": 1.614, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 29.97, |
|
"eval_gen_len": 17.6087, |
|
"eval_loss": 15.38318920135498, |
|
"eval_rouge1": 0.09, |
|
"eval_rouge2": 0.0179, |
|
"eval_rougeL": 0.0717, |
|
"eval_rougeLsum": 0.0717, |
|
"eval_runtime": 12.4102, |
|
"eval_samples_per_second": 9.267, |
|
"eval_steps_per_second": 1.612, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 30.96, |
|
"eval_gen_len": 17.6087, |
|
"eval_loss": 14.880407333374023, |
|
"eval_rouge1": 0.0921, |
|
"eval_rouge2": 0.02, |
|
"eval_rougeL": 0.0768, |
|
"eval_rougeLsum": 0.0765, |
|
"eval_runtime": 12.1352, |
|
"eval_samples_per_second": 9.477, |
|
"eval_steps_per_second": 1.648, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_gen_len": 17.7478, |
|
"eval_loss": 14.299490928649902, |
|
"eval_rouge1": 0.0856, |
|
"eval_rouge2": 0.0197, |
|
"eval_rougeL": 0.0713, |
|
"eval_rougeLsum": 0.071, |
|
"eval_runtime": 12.7436, |
|
"eval_samples_per_second": 9.024, |
|
"eval_steps_per_second": 1.569, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 32.99, |
|
"eval_gen_len": 17.0522, |
|
"eval_loss": 13.684185981750488, |
|
"eval_rouge1": 0.0796, |
|
"eval_rouge2": 0.0153, |
|
"eval_rougeL": 0.0627, |
|
"eval_rougeLsum": 0.0626, |
|
"eval_runtime": 12.5048, |
|
"eval_samples_per_second": 9.196, |
|
"eval_steps_per_second": 1.599, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 33.97, |
|
"eval_gen_len": 17.3304, |
|
"eval_loss": 13.098108291625977, |
|
"eval_rouge1": 0.0772, |
|
"eval_rouge2": 0.0098, |
|
"eval_rougeL": 0.0646, |
|
"eval_rougeLsum": 0.0643, |
|
"eval_runtime": 12.1222, |
|
"eval_samples_per_second": 9.487, |
|
"eval_steps_per_second": 1.65, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 34.96, |
|
"eval_gen_len": 17.4696, |
|
"eval_loss": 12.545892715454102, |
|
"eval_rouge1": 0.0751, |
|
"eval_rouge2": 0.0107, |
|
"eval_rougeL": 0.0622, |
|
"eval_rougeLsum": 0.0622, |
|
"eval_runtime": 12.1398, |
|
"eval_samples_per_second": 9.473, |
|
"eval_steps_per_second": 1.647, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_gen_len": 17.3304, |
|
"eval_loss": 12.004250526428223, |
|
"eval_rouge1": 0.0704, |
|
"eval_rouge2": 0.0081, |
|
"eval_rougeL": 0.0588, |
|
"eval_rougeLsum": 0.0588, |
|
"eval_runtime": 12.5092, |
|
"eval_samples_per_second": 9.193, |
|
"eval_steps_per_second": 1.599, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 36.99, |
|
"eval_gen_len": 17.887, |
|
"eval_loss": 11.514721870422363, |
|
"eval_rouge1": 0.0672, |
|
"eval_rouge2": 0.0079, |
|
"eval_rougeL": 0.0577, |
|
"eval_rougeLsum": 0.0577, |
|
"eval_runtime": 12.4809, |
|
"eval_samples_per_second": 9.214, |
|
"eval_steps_per_second": 1.602, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 37.97, |
|
"eval_gen_len": 17.6087, |
|
"eval_loss": 11.038866996765137, |
|
"eval_rouge1": 0.0513, |
|
"eval_rouge2": 0.0045, |
|
"eval_rougeL": 0.046, |
|
"eval_rougeLsum": 0.0457, |
|
"eval_runtime": 12.6414, |
|
"eval_samples_per_second": 9.097, |
|
"eval_steps_per_second": 1.582, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 38.96, |
|
"eval_gen_len": 17.1913, |
|
"eval_loss": 10.570833206176758, |
|
"eval_rouge1": 0.0468, |
|
"eval_rouge2": 0.0029, |
|
"eval_rougeL": 0.0404, |
|
"eval_rougeLsum": 0.0403, |
|
"eval_runtime": 12.7674, |
|
"eval_samples_per_second": 9.007, |
|
"eval_steps_per_second": 1.566, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_gen_len": 17.3565, |
|
"eval_loss": 10.076028823852539, |
|
"eval_rouge1": 0.0352, |
|
"eval_rouge2": 0.0037, |
|
"eval_rougeL": 0.0305, |
|
"eval_rougeLsum": 0.0304, |
|
"eval_runtime": 12.8987, |
|
"eval_samples_per_second": 8.916, |
|
"eval_steps_per_second": 1.551, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 40.99, |
|
"eval_gen_len": 17.4957, |
|
"eval_loss": 9.603371620178223, |
|
"eval_rouge1": 0.024, |
|
"eval_rouge2": 0.0024, |
|
"eval_rougeL": 0.0218, |
|
"eval_rougeLsum": 0.0218, |
|
"eval_runtime": 12.2083, |
|
"eval_samples_per_second": 9.42, |
|
"eval_steps_per_second": 1.638, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 41.97, |
|
"eval_gen_len": 18.3043, |
|
"eval_loss": 9.131211280822754, |
|
"eval_rouge1": 0.0193, |
|
"eval_rouge2": 0.0022, |
|
"eval_rougeL": 0.0184, |
|
"eval_rougeLsum": 0.0185, |
|
"eval_runtime": 12.1389, |
|
"eval_samples_per_second": 9.474, |
|
"eval_steps_per_second": 1.648, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 42.96, |
|
"eval_gen_len": 18.7217, |
|
"eval_loss": 8.668445587158203, |
|
"eval_rouge1": 0.0116, |
|
"eval_rouge2": 0.0013, |
|
"eval_rougeL": 0.0109, |
|
"eval_rougeLsum": 0.0108, |
|
"eval_runtime": 12.4712, |
|
"eval_samples_per_second": 9.221, |
|
"eval_steps_per_second": 1.604, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_gen_len": 18.8609, |
|
"eval_loss": 8.1836576461792, |
|
"eval_rouge1": 0.0031, |
|
"eval_rouge2": 0.0004, |
|
"eval_rougeL": 0.0032, |
|
"eval_rougeLsum": 0.0032, |
|
"eval_runtime": 12.1395, |
|
"eval_samples_per_second": 9.473, |
|
"eval_steps_per_second": 1.648, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 44.99, |
|
"eval_gen_len": 18.8609, |
|
"eval_loss": 7.736245632171631, |
|
"eval_rouge1": 0.0028, |
|
"eval_rouge2": 0.0002, |
|
"eval_rougeL": 0.0028, |
|
"eval_rougeLsum": 0.0028, |
|
"eval_runtime": 12.2043, |
|
"eval_samples_per_second": 9.423, |
|
"eval_steps_per_second": 1.639, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 45.97, |
|
"eval_gen_len": 18.8609, |
|
"eval_loss": 7.298835277557373, |
|
"eval_rouge1": 0.0018, |
|
"eval_rouge2": 0.0004, |
|
"eval_rougeL": 0.0018, |
|
"eval_rougeLsum": 0.0018, |
|
"eval_runtime": 12.1301, |
|
"eval_samples_per_second": 9.481, |
|
"eval_steps_per_second": 1.649, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 46.96, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 6.873920917510986, |
|
"eval_rouge1": 0.001, |
|
"eval_rouge2": 0.0002, |
|
"eval_rougeL": 0.001, |
|
"eval_rougeLsum": 0.0009, |
|
"eval_runtime": 12.1519, |
|
"eval_samples_per_second": 9.464, |
|
"eval_steps_per_second": 1.646, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 6.440176486968994, |
|
"eval_rouge1": 0.0001, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0001, |
|
"eval_rougeLsum": 0.0001, |
|
"eval_runtime": 12.3618, |
|
"eval_samples_per_second": 9.303, |
|
"eval_steps_per_second": 1.618, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 48.99, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 6.049317359924316, |
|
"eval_rouge1": 0.0006, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0006, |
|
"eval_rougeLsum": 0.0005, |
|
"eval_runtime": 12.167, |
|
"eval_samples_per_second": 9.452, |
|
"eval_steps_per_second": 1.644, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 49.97, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 5.643195152282715, |
|
"eval_rouge1": 0.0002, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0002, |
|
"eval_rougeLsum": 0.0002, |
|
"eval_runtime": 11.9138, |
|
"eval_samples_per_second": 9.653, |
|
"eval_steps_per_second": 1.679, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 50.96, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 5.2581987380981445, |
|
"eval_rouge1": 0.0007, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0008, |
|
"eval_rougeLsum": 0.0007, |
|
"eval_runtime": 12.5979, |
|
"eval_samples_per_second": 9.129, |
|
"eval_steps_per_second": 1.588, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 51.95, |
|
"grad_norm": 5.70858097076416, |
|
"learning_rate": 8.949473684210527e-06, |
|
"loss": 11.5478, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 4.877782344818115, |
|
"eval_rouge1": 0.0006, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0006, |
|
"eval_rougeLsum": 0.0006, |
|
"eval_runtime": 12.0728, |
|
"eval_samples_per_second": 9.526, |
|
"eval_steps_per_second": 1.657, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 52.99, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 4.53688383102417, |
|
"eval_rouge1": 0.0006, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0006, |
|
"eval_rougeLsum": 0.0006, |
|
"eval_runtime": 12.1263, |
|
"eval_samples_per_second": 9.484, |
|
"eval_steps_per_second": 1.649, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 53.97, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 4.222665309906006, |
|
"eval_rouge1": 0.0002, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0002, |
|
"eval_rougeLsum": 0.0002, |
|
"eval_runtime": 12.5806, |
|
"eval_samples_per_second": 9.141, |
|
"eval_steps_per_second": 1.59, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 54.96, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 3.9297854900360107, |
|
"eval_rouge1": 0.0008, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0008, |
|
"eval_rougeLsum": 0.0007, |
|
"eval_runtime": 12.7467, |
|
"eval_samples_per_second": 9.022, |
|
"eval_steps_per_second": 1.569, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 3.6505942344665527, |
|
"eval_rouge1": 0.0005, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0005, |
|
"eval_rougeLsum": 0.0005, |
|
"eval_runtime": 12.6962, |
|
"eval_samples_per_second": 9.058, |
|
"eval_steps_per_second": 1.575, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 56.99, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 3.4100279808044434, |
|
"eval_rouge1": 0.0002, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0002, |
|
"eval_rougeLsum": 0.0002, |
|
"eval_runtime": 12.5475, |
|
"eval_samples_per_second": 9.165, |
|
"eval_steps_per_second": 1.594, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 57.97, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 3.197094202041626, |
|
"eval_rouge1": 0.0006, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0006, |
|
"eval_rougeLsum": 0.0006, |
|
"eval_runtime": 12.1721, |
|
"eval_samples_per_second": 9.448, |
|
"eval_steps_per_second": 1.643, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 58.96, |
|
"eval_gen_len": 18.9913, |
|
"eval_loss": 3.0094308853149414, |
|
"eval_rouge1": 0.0006, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0006, |
|
"eval_rougeLsum": 0.0006, |
|
"eval_runtime": 12.2313, |
|
"eval_samples_per_second": 9.402, |
|
"eval_steps_per_second": 1.635, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_gen_len": 18.9913, |
|
"eval_loss": 2.841013193130493, |
|
"eval_rouge1": 0.0008, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0008, |
|
"eval_rougeLsum": 0.0008, |
|
"eval_runtime": 12.5511, |
|
"eval_samples_per_second": 9.163, |
|
"eval_steps_per_second": 1.593, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 60.99, |
|
"eval_gen_len": 18.9826, |
|
"eval_loss": 2.698159694671631, |
|
"eval_rouge1": 0.0007, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0007, |
|
"eval_rougeLsum": 0.0007, |
|
"eval_runtime": 12.0968, |
|
"eval_samples_per_second": 9.507, |
|
"eval_steps_per_second": 1.653, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 61.97, |
|
"eval_gen_len": 18.6783, |
|
"eval_loss": 2.5645217895507812, |
|
"eval_rouge1": 0.0011, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0011, |
|
"eval_rougeLsum": 0.0011, |
|
"eval_runtime": 12.1821, |
|
"eval_samples_per_second": 9.44, |
|
"eval_steps_per_second": 1.642, |
|
"step": 1193 |
|
}, |
|
{ |
|
"epoch": 62.96, |
|
"eval_gen_len": 17.3565, |
|
"eval_loss": 2.445538282394409, |
|
"eval_rouge1": 0.0007, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0007, |
|
"eval_rougeLsum": 0.0007, |
|
"eval_runtime": 12.1339, |
|
"eval_samples_per_second": 9.478, |
|
"eval_steps_per_second": 1.648, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_gen_len": 13.5043, |
|
"eval_loss": 2.338679075241089, |
|
"eval_rouge1": 0.0011, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.001, |
|
"eval_rougeLsum": 0.001, |
|
"eval_runtime": 12.4085, |
|
"eval_samples_per_second": 9.268, |
|
"eval_steps_per_second": 1.612, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 64.99, |
|
"eval_gen_len": 9.4348, |
|
"eval_loss": 2.2483484745025635, |
|
"eval_rouge1": 0.0002, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0002, |
|
"eval_rougeLsum": 0.0002, |
|
"eval_runtime": 12.3896, |
|
"eval_samples_per_second": 9.282, |
|
"eval_steps_per_second": 1.614, |
|
"step": 1251 |
|
}, |
|
{ |
|
"epoch": 65.97, |
|
"eval_gen_len": 6.9652, |
|
"eval_loss": 2.1728155612945557, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.4531, |
|
"eval_samples_per_second": 9.235, |
|
"eval_steps_per_second": 1.606, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 66.96, |
|
"eval_gen_len": 6.2957, |
|
"eval_loss": 2.1103546619415283, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.2925, |
|
"eval_samples_per_second": 9.355, |
|
"eval_steps_per_second": 1.627, |
|
"step": 1289 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_gen_len": 6.0, |
|
"eval_loss": 2.0531256198883057, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.1269, |
|
"eval_samples_per_second": 9.483, |
|
"eval_steps_per_second": 1.649, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 68.99, |
|
"eval_gen_len": 5.7043, |
|
"eval_loss": 2.006763219833374, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.0803, |
|
"eval_samples_per_second": 9.52, |
|
"eval_steps_per_second": 1.656, |
|
"step": 1328 |
|
}, |
|
{ |
|
"epoch": 69.97, |
|
"eval_gen_len": 5.6609, |
|
"eval_loss": 1.9675697088241577, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 13.197, |
|
"eval_samples_per_second": 8.714, |
|
"eval_steps_per_second": 1.515, |
|
"step": 1347 |
|
}, |
|
{ |
|
"epoch": 70.96, |
|
"eval_gen_len": 5.6, |
|
"eval_loss": 1.9337714910507202, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.696, |
|
"eval_samples_per_second": 9.058, |
|
"eval_steps_per_second": 1.575, |
|
"step": 1366 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_gen_len": 5.6174, |
|
"eval_loss": 1.9011404514312744, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.5629, |
|
"eval_samples_per_second": 9.154, |
|
"eval_steps_per_second": 1.592, |
|
"step": 1386 |
|
}, |
|
{ |
|
"epoch": 72.99, |
|
"eval_gen_len": 5.6435, |
|
"eval_loss": 1.8734184503555298, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.9654, |
|
"eval_samples_per_second": 8.87, |
|
"eval_steps_per_second": 1.543, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 73.97, |
|
"eval_gen_len": 5.7739, |
|
"eval_loss": 1.84665846824646, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.8257, |
|
"eval_samples_per_second": 8.966, |
|
"eval_steps_per_second": 1.559, |
|
"step": 1424 |
|
}, |
|
{ |
|
"epoch": 74.96, |
|
"eval_gen_len": 5.7478, |
|
"eval_loss": 1.8196372985839844, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.89, |
|
"eval_samples_per_second": 8.922, |
|
"eval_steps_per_second": 1.552, |
|
"step": 1443 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_gen_len": 5.7217, |
|
"eval_loss": 1.797453761100769, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.4957, |
|
"eval_samples_per_second": 9.203, |
|
"eval_steps_per_second": 1.601, |
|
"step": 1463 |
|
}, |
|
{ |
|
"epoch": 76.99, |
|
"eval_gen_len": 5.8174, |
|
"eval_loss": 1.7788159847259521, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.4326, |
|
"eval_samples_per_second": 9.25, |
|
"eval_steps_per_second": 1.609, |
|
"step": 1482 |
|
}, |
|
{ |
|
"epoch": 77.92, |
|
"grad_norm": 2.0204899311065674, |
|
"learning_rate": 8.42421052631579e-06, |
|
"loss": 3.2357, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 77.97, |
|
"eval_gen_len": 5.8957, |
|
"eval_loss": 1.76212739944458, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.5086, |
|
"eval_samples_per_second": 9.194, |
|
"eval_steps_per_second": 1.599, |
|
"step": 1501 |
|
}, |
|
{ |
|
"epoch": 78.96, |
|
"eval_gen_len": 5.8957, |
|
"eval_loss": 1.744727373123169, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.3492, |
|
"eval_samples_per_second": 9.312, |
|
"eval_steps_per_second": 1.62, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_gen_len": 5.9391, |
|
"eval_loss": 1.7277677059173584, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.126, |
|
"eval_samples_per_second": 9.484, |
|
"eval_steps_per_second": 1.649, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 80.99, |
|
"eval_gen_len": 5.8435, |
|
"eval_loss": 1.7146191596984863, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.5132, |
|
"eval_samples_per_second": 9.19, |
|
"eval_steps_per_second": 1.598, |
|
"step": 1559 |
|
}, |
|
{ |
|
"epoch": 81.97, |
|
"eval_gen_len": 5.513, |
|
"eval_loss": 1.7026437520980835, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.7453, |
|
"eval_samples_per_second": 9.023, |
|
"eval_steps_per_second": 1.569, |
|
"step": 1578 |
|
}, |
|
{ |
|
"epoch": 82.96, |
|
"eval_gen_len": 5.5652, |
|
"eval_loss": 1.68914794921875, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.98, |
|
"eval_samples_per_second": 8.86, |
|
"eval_steps_per_second": 1.541, |
|
"step": 1597 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_gen_len": 5.3304, |
|
"eval_loss": 1.6754295825958252, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.127, |
|
"eval_samples_per_second": 9.483, |
|
"eval_steps_per_second": 1.649, |
|
"step": 1617 |
|
}, |
|
{ |
|
"epoch": 84.99, |
|
"eval_gen_len": 5.6435, |
|
"eval_loss": 1.6632497310638428, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.2091, |
|
"eval_samples_per_second": 9.419, |
|
"eval_steps_per_second": 1.638, |
|
"step": 1636 |
|
}, |
|
{ |
|
"epoch": 85.97, |
|
"eval_gen_len": 5.9652, |
|
"eval_loss": 1.652411699295044, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.3822, |
|
"eval_samples_per_second": 9.288, |
|
"eval_steps_per_second": 1.615, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 86.96, |
|
"eval_gen_len": 5.9478, |
|
"eval_loss": 1.642953872680664, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.0847, |
|
"eval_samples_per_second": 9.516, |
|
"eval_steps_per_second": 1.655, |
|
"step": 1674 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_gen_len": 5.4696, |
|
"eval_loss": 1.6336156129837036, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.5368, |
|
"eval_samples_per_second": 9.173, |
|
"eval_steps_per_second": 1.595, |
|
"step": 1694 |
|
}, |
|
{ |
|
"epoch": 88.99, |
|
"eval_gen_len": 5.4, |
|
"eval_loss": 1.6246790885925293, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.4452, |
|
"eval_samples_per_second": 9.241, |
|
"eval_steps_per_second": 1.607, |
|
"step": 1713 |
|
}, |
|
{ |
|
"epoch": 89.97, |
|
"eval_gen_len": 5.7739, |
|
"eval_loss": 1.615963339805603, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.0491, |
|
"eval_samples_per_second": 9.544, |
|
"eval_steps_per_second": 1.66, |
|
"step": 1732 |
|
}, |
|
{ |
|
"epoch": 90.96, |
|
"eval_gen_len": 6.2348, |
|
"eval_loss": 1.606810450553894, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.2296, |
|
"eval_samples_per_second": 9.403, |
|
"eval_steps_per_second": 1.635, |
|
"step": 1751 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_gen_len": 6.1652, |
|
"eval_loss": 1.59696626663208, |
|
"eval_rouge1": 0.0002, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0002, |
|
"eval_rougeLsum": 0.0002, |
|
"eval_runtime": 12.4411, |
|
"eval_samples_per_second": 9.244, |
|
"eval_steps_per_second": 1.608, |
|
"step": 1771 |
|
}, |
|
{ |
|
"epoch": 92.99, |
|
"eval_gen_len": 6.3739, |
|
"eval_loss": 1.5894649028778076, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 11.8551, |
|
"eval_samples_per_second": 9.7, |
|
"eval_steps_per_second": 1.687, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 93.97, |
|
"eval_gen_len": 6.5043, |
|
"eval_loss": 1.5818349123001099, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.2434, |
|
"eval_samples_per_second": 9.393, |
|
"eval_steps_per_second": 1.634, |
|
"step": 1809 |
|
}, |
|
{ |
|
"epoch": 94.96, |
|
"eval_gen_len": 6.3565, |
|
"eval_loss": 1.5746902227401733, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.3859, |
|
"eval_samples_per_second": 9.285, |
|
"eval_steps_per_second": 1.615, |
|
"step": 1828 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_gen_len": 6.8087, |
|
"eval_loss": 1.567280650138855, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.1358, |
|
"eval_samples_per_second": 9.476, |
|
"eval_steps_per_second": 1.648, |
|
"step": 1848 |
|
}, |
|
{ |
|
"epoch": 96.99, |
|
"eval_gen_len": 6.8, |
|
"eval_loss": 1.5616425275802612, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.3339, |
|
"eval_samples_per_second": 9.324, |
|
"eval_steps_per_second": 1.622, |
|
"step": 1867 |
|
}, |
|
{ |
|
"epoch": 97.97, |
|
"eval_gen_len": 6.6522, |
|
"eval_loss": 1.5548292398452759, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.1749, |
|
"eval_samples_per_second": 9.446, |
|
"eval_steps_per_second": 1.643, |
|
"step": 1886 |
|
}, |
|
{ |
|
"epoch": 98.96, |
|
"eval_gen_len": 6.5913, |
|
"eval_loss": 1.5485645532608032, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 11.995, |
|
"eval_samples_per_second": 9.587, |
|
"eval_steps_per_second": 1.667, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_gen_len": 6.4522, |
|
"eval_loss": 1.5418448448181152, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 11.7913, |
|
"eval_samples_per_second": 9.753, |
|
"eval_steps_per_second": 1.696, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 100.99, |
|
"eval_gen_len": 5.6957, |
|
"eval_loss": 1.5365816354751587, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.5711, |
|
"eval_samples_per_second": 9.148, |
|
"eval_steps_per_second": 1.591, |
|
"step": 1944 |
|
}, |
|
{ |
|
"epoch": 101.97, |
|
"eval_gen_len": 5.5739, |
|
"eval_loss": 1.5312349796295166, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.7436, |
|
"eval_samples_per_second": 9.024, |
|
"eval_steps_per_second": 1.569, |
|
"step": 1963 |
|
}, |
|
{ |
|
"epoch": 102.96, |
|
"eval_gen_len": 5.4174, |
|
"eval_loss": 1.5244060754776, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.3775, |
|
"eval_samples_per_second": 9.291, |
|
"eval_steps_per_second": 1.616, |
|
"step": 1982 |
|
}, |
|
{ |
|
"epoch": 103.9, |
|
"grad_norm": 1.870866298675537, |
|
"learning_rate": 7.898947368421053e-06, |
|
"loss": 1.8779, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"eval_gen_len": 5.3565, |
|
"eval_loss": 1.5186233520507812, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.0793, |
|
"eval_samples_per_second": 9.52, |
|
"eval_steps_per_second": 1.656, |
|
"step": 2002 |
|
}, |
|
{ |
|
"epoch": 104.99, |
|
"eval_gen_len": 5.6174, |
|
"eval_loss": 1.5112248659133911, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 11.9289, |
|
"eval_samples_per_second": 9.64, |
|
"eval_steps_per_second": 1.677, |
|
"step": 2021 |
|
}, |
|
{ |
|
"epoch": 105.97, |
|
"eval_gen_len": 5.9217, |
|
"eval_loss": 1.5045664310455322, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 11.9549, |
|
"eval_samples_per_second": 9.619, |
|
"eval_steps_per_second": 1.673, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 106.96, |
|
"eval_gen_len": 5.9913, |
|
"eval_loss": 1.4977103471755981, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.5176, |
|
"eval_samples_per_second": 9.187, |
|
"eval_steps_per_second": 1.598, |
|
"step": 2059 |
|
}, |
|
{ |
|
"epoch": 108.0, |
|
"eval_gen_len": 5.4957, |
|
"eval_loss": 1.491757869720459, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.36, |
|
"eval_samples_per_second": 9.304, |
|
"eval_steps_per_second": 1.618, |
|
"step": 2079 |
|
}, |
|
{ |
|
"epoch": 108.99, |
|
"eval_gen_len": 6.0348, |
|
"eval_loss": 1.486743688583374, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.5097, |
|
"eval_samples_per_second": 9.193, |
|
"eval_steps_per_second": 1.599, |
|
"step": 2098 |
|
}, |
|
{ |
|
"epoch": 109.97, |
|
"eval_gen_len": 6.3304, |
|
"eval_loss": 1.480473279953003, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 11.9075, |
|
"eval_samples_per_second": 9.658, |
|
"eval_steps_per_second": 1.68, |
|
"step": 2117 |
|
}, |
|
{ |
|
"epoch": 110.96, |
|
"eval_gen_len": 6.2, |
|
"eval_loss": 1.4745731353759766, |
|
"eval_rouge1": 0.0003, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 12.2105, |
|
"eval_samples_per_second": 9.418, |
|
"eval_steps_per_second": 1.638, |
|
"step": 2136 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"eval_gen_len": 5.9826, |
|
"eval_loss": 1.468475341796875, |
|
"eval_rouge1": 0.0009, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0008, |
|
"eval_rougeLsum": 0.0008, |
|
"eval_runtime": 12.6855, |
|
"eval_samples_per_second": 9.065, |
|
"eval_steps_per_second": 1.577, |
|
"step": 2156 |
|
}, |
|
{ |
|
"epoch": 112.99, |
|
"eval_gen_len": 5.8261, |
|
"eval_loss": 1.4624364376068115, |
|
"eval_rouge1": 0.0009, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0008, |
|
"eval_rougeLsum": 0.0008, |
|
"eval_runtime": 12.3065, |
|
"eval_samples_per_second": 9.345, |
|
"eval_steps_per_second": 1.625, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 113.97, |
|
"eval_gen_len": 5.487, |
|
"eval_loss": 1.4564381837844849, |
|
"eval_rouge1": 0.0009, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0008, |
|
"eval_rougeLsum": 0.0008, |
|
"eval_runtime": 13.0224, |
|
"eval_samples_per_second": 8.831, |
|
"eval_steps_per_second": 1.536, |
|
"step": 2194 |
|
}, |
|
{ |
|
"epoch": 114.96, |
|
"eval_gen_len": 5.1565, |
|
"eval_loss": 1.4514414072036743, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.4022, |
|
"eval_samples_per_second": 9.273, |
|
"eval_steps_per_second": 1.613, |
|
"step": 2213 |
|
}, |
|
{ |
|
"epoch": 116.0, |
|
"eval_gen_len": 5.4957, |
|
"eval_loss": 1.442409873008728, |
|
"eval_rouge1": 0.001, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0009, |
|
"eval_rougeLsum": 0.0009, |
|
"eval_runtime": 12.3722, |
|
"eval_samples_per_second": 9.295, |
|
"eval_steps_per_second": 1.617, |
|
"step": 2233 |
|
}, |
|
{ |
|
"epoch": 116.99, |
|
"eval_gen_len": 5.7391, |
|
"eval_loss": 1.4344819784164429, |
|
"eval_rouge1": 0.0017, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0017, |
|
"eval_rougeLsum": 0.0018, |
|
"eval_runtime": 12.5554, |
|
"eval_samples_per_second": 9.159, |
|
"eval_steps_per_second": 1.593, |
|
"step": 2252 |
|
}, |
|
{ |
|
"epoch": 117.97, |
|
"eval_gen_len": 6.0435, |
|
"eval_loss": 1.4248623847961426, |
|
"eval_rouge1": 0.0021, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.002, |
|
"eval_rougeLsum": 0.0021, |
|
"eval_runtime": 12.1087, |
|
"eval_samples_per_second": 9.497, |
|
"eval_steps_per_second": 1.652, |
|
"step": 2271 |
|
}, |
|
{ |
|
"epoch": 118.96, |
|
"eval_gen_len": 6.4783, |
|
"eval_loss": 1.4156382083892822, |
|
"eval_rouge1": 0.0033, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0032, |
|
"eval_rougeLsum": 0.0033, |
|
"eval_runtime": 12.0374, |
|
"eval_samples_per_second": 9.554, |
|
"eval_steps_per_second": 1.661, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"eval_gen_len": 6.3043, |
|
"eval_loss": 1.408909559249878, |
|
"eval_rouge1": 0.0038, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0037, |
|
"eval_rougeLsum": 0.0038, |
|
"eval_runtime": 12.5996, |
|
"eval_samples_per_second": 9.127, |
|
"eval_steps_per_second": 1.587, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 120.99, |
|
"eval_gen_len": 6.1043, |
|
"eval_loss": 1.4028282165527344, |
|
"eval_rouge1": 0.0043, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0042, |
|
"eval_rougeLsum": 0.0043, |
|
"eval_runtime": 12.384, |
|
"eval_samples_per_second": 9.286, |
|
"eval_steps_per_second": 1.615, |
|
"step": 2329 |
|
}, |
|
{ |
|
"epoch": 121.97, |
|
"eval_gen_len": 5.9478, |
|
"eval_loss": 1.3989005088806152, |
|
"eval_rouge1": 0.0036, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0036, |
|
"eval_rougeLsum": 0.0037, |
|
"eval_runtime": 12.3007, |
|
"eval_samples_per_second": 9.349, |
|
"eval_steps_per_second": 1.626, |
|
"step": 2348 |
|
}, |
|
{ |
|
"epoch": 122.96, |
|
"eval_gen_len": 5.4348, |
|
"eval_loss": 1.3940106630325317, |
|
"eval_rouge1": 0.0029, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0026, |
|
"eval_rougeLsum": 0.0026, |
|
"eval_runtime": 13.1431, |
|
"eval_samples_per_second": 8.75, |
|
"eval_steps_per_second": 1.522, |
|
"step": 2367 |
|
}, |
|
{ |
|
"epoch": 124.0, |
|
"eval_gen_len": 5.3913, |
|
"eval_loss": 1.387468695640564, |
|
"eval_rouge1": 0.0036, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0034, |
|
"eval_rougeLsum": 0.0035, |
|
"eval_runtime": 12.0602, |
|
"eval_samples_per_second": 9.535, |
|
"eval_steps_per_second": 1.658, |
|
"step": 2387 |
|
}, |
|
{ |
|
"epoch": 124.99, |
|
"eval_gen_len": 5.4174, |
|
"eval_loss": 1.3833892345428467, |
|
"eval_rouge1": 0.0031, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0031, |
|
"eval_rougeLsum": 0.0032, |
|
"eval_runtime": 12.5404, |
|
"eval_samples_per_second": 9.17, |
|
"eval_steps_per_second": 1.595, |
|
"step": 2406 |
|
}, |
|
{ |
|
"epoch": 125.97, |
|
"eval_gen_len": 5.8, |
|
"eval_loss": 1.3742746114730835, |
|
"eval_rouge1": 0.0034, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0034, |
|
"eval_rougeLsum": 0.0034, |
|
"eval_runtime": 12.5374, |
|
"eval_samples_per_second": 9.173, |
|
"eval_steps_per_second": 1.595, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 126.96, |
|
"eval_gen_len": 6.2348, |
|
"eval_loss": 1.3673855066299438, |
|
"eval_rouge1": 0.0054, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0051, |
|
"eval_rougeLsum": 0.0052, |
|
"eval_runtime": 12.3787, |
|
"eval_samples_per_second": 9.29, |
|
"eval_steps_per_second": 1.616, |
|
"step": 2444 |
|
}, |
|
{ |
|
"epoch": 128.0, |
|
"eval_gen_len": 6.3739, |
|
"eval_loss": 1.3610302209854126, |
|
"eval_rouge1": 0.0051, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0051, |
|
"eval_rougeLsum": 0.0053, |
|
"eval_runtime": 12.0722, |
|
"eval_samples_per_second": 9.526, |
|
"eval_steps_per_second": 1.657, |
|
"step": 2464 |
|
}, |
|
{ |
|
"epoch": 128.99, |
|
"eval_gen_len": 7.1565, |
|
"eval_loss": 1.351613163948059, |
|
"eval_rouge1": 0.0062, |
|
"eval_rouge2": 0.0002, |
|
"eval_rougeL": 0.0056, |
|
"eval_rougeLsum": 0.0057, |
|
"eval_runtime": 11.971, |
|
"eval_samples_per_second": 9.607, |
|
"eval_steps_per_second": 1.671, |
|
"step": 2483 |
|
}, |
|
{ |
|
"epoch": 129.87, |
|
"grad_norm": 2.394576072692871, |
|
"learning_rate": 7.3726315789473694e-06, |
|
"loss": 1.6063, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 129.97, |
|
"eval_gen_len": 7.4522, |
|
"eval_loss": 1.3424580097198486, |
|
"eval_rouge1": 0.0055, |
|
"eval_rouge2": 0.0002, |
|
"eval_rougeL": 0.0053, |
|
"eval_rougeLsum": 0.0053, |
|
"eval_runtime": 12.0884, |
|
"eval_samples_per_second": 9.513, |
|
"eval_steps_per_second": 1.654, |
|
"step": 2502 |
|
}, |
|
{ |
|
"epoch": 130.96, |
|
"eval_gen_len": 7.4609, |
|
"eval_loss": 1.334855556488037, |
|
"eval_rouge1": 0.0044, |
|
"eval_rouge2": 0.0002, |
|
"eval_rougeL": 0.0042, |
|
"eval_rougeLsum": 0.0041, |
|
"eval_runtime": 11.9536, |
|
"eval_samples_per_second": 9.621, |
|
"eval_steps_per_second": 1.673, |
|
"step": 2521 |
|
}, |
|
{ |
|
"epoch": 132.0, |
|
"eval_gen_len": 7.4522, |
|
"eval_loss": 1.3267827033996582, |
|
"eval_rouge1": 0.0048, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0046, |
|
"eval_rougeLsum": 0.0046, |
|
"eval_runtime": 12.6532, |
|
"eval_samples_per_second": 9.089, |
|
"eval_steps_per_second": 1.581, |
|
"step": 2541 |
|
}, |
|
{ |
|
"epoch": 132.99, |
|
"eval_gen_len": 7.8522, |
|
"eval_loss": 1.3201897144317627, |
|
"eval_rouge1": 0.0096, |
|
"eval_rouge2": 0.0002, |
|
"eval_rougeL": 0.0088, |
|
"eval_rougeLsum": 0.0089, |
|
"eval_runtime": 12.2562, |
|
"eval_samples_per_second": 9.383, |
|
"eval_steps_per_second": 1.632, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 133.97, |
|
"eval_gen_len": 7.5304, |
|
"eval_loss": 1.3138891458511353, |
|
"eval_rouge1": 0.0074, |
|
"eval_rouge2": 0.0002, |
|
"eval_rougeL": 0.0075, |
|
"eval_rougeLsum": 0.0075, |
|
"eval_runtime": 11.9989, |
|
"eval_samples_per_second": 9.584, |
|
"eval_steps_per_second": 1.667, |
|
"step": 2579 |
|
}, |
|
{ |
|
"epoch": 134.96, |
|
"eval_gen_len": 7.2348, |
|
"eval_loss": 1.3059097528457642, |
|
"eval_rouge1": 0.005, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.005, |
|
"eval_rougeLsum": 0.0051, |
|
"eval_runtime": 11.9785, |
|
"eval_samples_per_second": 9.6, |
|
"eval_steps_per_second": 1.67, |
|
"step": 2598 |
|
}, |
|
{ |
|
"epoch": 136.0, |
|
"eval_gen_len": 7.1304, |
|
"eval_loss": 1.298433780670166, |
|
"eval_rouge1": 0.005, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0046, |
|
"eval_rougeLsum": 0.0047, |
|
"eval_runtime": 12.1248, |
|
"eval_samples_per_second": 9.485, |
|
"eval_steps_per_second": 1.65, |
|
"step": 2618 |
|
}, |
|
{ |
|
"epoch": 136.99, |
|
"eval_gen_len": 8.0261, |
|
"eval_loss": 1.29219651222229, |
|
"eval_rouge1": 0.0072, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0069, |
|
"eval_rougeLsum": 0.0069, |
|
"eval_runtime": 12.5125, |
|
"eval_samples_per_second": 9.191, |
|
"eval_steps_per_second": 1.598, |
|
"step": 2637 |
|
}, |
|
{ |
|
"epoch": 137.97, |
|
"eval_gen_len": 8.4087, |
|
"eval_loss": 1.2833046913146973, |
|
"eval_rouge1": 0.0108, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0097, |
|
"eval_rougeLsum": 0.0099, |
|
"eval_runtime": 12.0943, |
|
"eval_samples_per_second": 9.509, |
|
"eval_steps_per_second": 1.654, |
|
"step": 2656 |
|
}, |
|
{ |
|
"epoch": 138.96, |
|
"eval_gen_len": 8.3739, |
|
"eval_loss": 1.278290033340454, |
|
"eval_rouge1": 0.0111, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0094, |
|
"eval_rougeLsum": 0.0095, |
|
"eval_runtime": 12.8059, |
|
"eval_samples_per_second": 8.98, |
|
"eval_steps_per_second": 1.562, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 140.0, |
|
"eval_gen_len": 8.5043, |
|
"eval_loss": 1.2764371633529663, |
|
"eval_rouge1": 0.0114, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0104, |
|
"eval_rougeLsum": 0.0108, |
|
"eval_runtime": 12.1122, |
|
"eval_samples_per_second": 9.495, |
|
"eval_steps_per_second": 1.651, |
|
"step": 2695 |
|
}, |
|
{ |
|
"epoch": 140.99, |
|
"eval_gen_len": 8.6261, |
|
"eval_loss": 1.2698535919189453, |
|
"eval_rouge1": 0.0139, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0126, |
|
"eval_rougeLsum": 0.0128, |
|
"eval_runtime": 12.5216, |
|
"eval_samples_per_second": 9.184, |
|
"eval_steps_per_second": 1.597, |
|
"step": 2714 |
|
}, |
|
{ |
|
"epoch": 141.97, |
|
"eval_gen_len": 8.2435, |
|
"eval_loss": 1.2616974115371704, |
|
"eval_rouge1": 0.0135, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0119, |
|
"eval_rougeLsum": 0.012, |
|
"eval_runtime": 12.2763, |
|
"eval_samples_per_second": 9.368, |
|
"eval_steps_per_second": 1.629, |
|
"step": 2733 |
|
}, |
|
{ |
|
"epoch": 142.96, |
|
"eval_gen_len": 8.713, |
|
"eval_loss": 1.2539962530136108, |
|
"eval_rouge1": 0.0144, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0123, |
|
"eval_rougeLsum": 0.0125, |
|
"eval_runtime": 12.1565, |
|
"eval_samples_per_second": 9.46, |
|
"eval_steps_per_second": 1.645, |
|
"step": 2752 |
|
}, |
|
{ |
|
"epoch": 144.0, |
|
"eval_gen_len": 8.9826, |
|
"eval_loss": 1.2482250928878784, |
|
"eval_rouge1": 0.0153, |
|
"eval_rouge2": 0.0002, |
|
"eval_rougeL": 0.0137, |
|
"eval_rougeLsum": 0.0137, |
|
"eval_runtime": 12.3974, |
|
"eval_samples_per_second": 9.276, |
|
"eval_steps_per_second": 1.613, |
|
"step": 2772 |
|
}, |
|
{ |
|
"epoch": 144.99, |
|
"eval_gen_len": 8.9391, |
|
"eval_loss": 1.2442501783370972, |
|
"eval_rouge1": 0.0139, |
|
"eval_rouge2": 0.0006, |
|
"eval_rougeL": 0.013, |
|
"eval_rougeLsum": 0.0129, |
|
"eval_runtime": 12.5443, |
|
"eval_samples_per_second": 9.168, |
|
"eval_steps_per_second": 1.594, |
|
"step": 2791 |
|
}, |
|
{ |
|
"epoch": 145.97, |
|
"eval_gen_len": 9.3565, |
|
"eval_loss": 1.2381587028503418, |
|
"eval_rouge1": 0.0187, |
|
"eval_rouge2": 0.0011, |
|
"eval_rougeL": 0.0151, |
|
"eval_rougeLsum": 0.0151, |
|
"eval_runtime": 12.381, |
|
"eval_samples_per_second": 9.288, |
|
"eval_steps_per_second": 1.615, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 146.96, |
|
"eval_gen_len": 9.513, |
|
"eval_loss": 1.2287580966949463, |
|
"eval_rouge1": 0.0202, |
|
"eval_rouge2": 0.0008, |
|
"eval_rougeL": 0.0172, |
|
"eval_rougeLsum": 0.0172, |
|
"eval_runtime": 12.6591, |
|
"eval_samples_per_second": 9.084, |
|
"eval_steps_per_second": 1.58, |
|
"step": 2829 |
|
}, |
|
{ |
|
"epoch": 148.0, |
|
"eval_gen_len": 8.5565, |
|
"eval_loss": 1.2264941930770874, |
|
"eval_rouge1": 0.0147, |
|
"eval_rouge2": 0.0004, |
|
"eval_rougeL": 0.0126, |
|
"eval_rougeLsum": 0.0124, |
|
"eval_runtime": 11.9708, |
|
"eval_samples_per_second": 9.607, |
|
"eval_steps_per_second": 1.671, |
|
"step": 2849 |
|
}, |
|
{ |
|
"epoch": 148.99, |
|
"eval_gen_len": 8.7652, |
|
"eval_loss": 1.222589135169983, |
|
"eval_rouge1": 0.0153, |
|
"eval_rouge2": 0.0005, |
|
"eval_rougeL": 0.0129, |
|
"eval_rougeLsum": 0.0129, |
|
"eval_runtime": 12.5442, |
|
"eval_samples_per_second": 9.168, |
|
"eval_steps_per_second": 1.594, |
|
"step": 2868 |
|
}, |
|
{ |
|
"epoch": 149.97, |
|
"eval_gen_len": 8.8435, |
|
"eval_loss": 1.2170130014419556, |
|
"eval_rouge1": 0.0147, |
|
"eval_rouge2": 0.0008, |
|
"eval_rougeL": 0.0128, |
|
"eval_rougeLsum": 0.0127, |
|
"eval_runtime": 12.6121, |
|
"eval_samples_per_second": 9.118, |
|
"eval_steps_per_second": 1.586, |
|
"step": 2887 |
|
}, |
|
{ |
|
"epoch": 150.96, |
|
"eval_gen_len": 9.4174, |
|
"eval_loss": 1.208147406578064, |
|
"eval_rouge1": 0.0181, |
|
"eval_rouge2": 0.001, |
|
"eval_rougeL": 0.0162, |
|
"eval_rougeLsum": 0.0162, |
|
"eval_runtime": 13.01, |
|
"eval_samples_per_second": 8.839, |
|
"eval_steps_per_second": 1.537, |
|
"step": 2906 |
|
}, |
|
{ |
|
"epoch": 152.0, |
|
"eval_gen_len": 9.7739, |
|
"eval_loss": 1.2039202451705933, |
|
"eval_rouge1": 0.0216, |
|
"eval_rouge2": 0.0013, |
|
"eval_rougeL": 0.019, |
|
"eval_rougeLsum": 0.0191, |
|
"eval_runtime": 12.7191, |
|
"eval_samples_per_second": 9.042, |
|
"eval_steps_per_second": 1.572, |
|
"step": 2926 |
|
}, |
|
{ |
|
"epoch": 152.99, |
|
"eval_gen_len": 9.5652, |
|
"eval_loss": 1.200941801071167, |
|
"eval_rouge1": 0.02, |
|
"eval_rouge2": 0.0011, |
|
"eval_rougeL": 0.0176, |
|
"eval_rougeLsum": 0.0178, |
|
"eval_runtime": 12.6301, |
|
"eval_samples_per_second": 9.105, |
|
"eval_steps_per_second": 1.584, |
|
"step": 2945 |
|
}, |
|
{ |
|
"epoch": 153.97, |
|
"eval_gen_len": 9.4609, |
|
"eval_loss": 1.195379376411438, |
|
"eval_rouge1": 0.0156, |
|
"eval_rouge2": 0.0008, |
|
"eval_rougeL": 0.0131, |
|
"eval_rougeLsum": 0.0133, |
|
"eval_runtime": 12.3176, |
|
"eval_samples_per_second": 9.336, |
|
"eval_steps_per_second": 1.624, |
|
"step": 2964 |
|
}, |
|
{ |
|
"epoch": 154.96, |
|
"eval_gen_len": 9.6522, |
|
"eval_loss": 1.1899113655090332, |
|
"eval_rouge1": 0.0181, |
|
"eval_rouge2": 0.001, |
|
"eval_rougeL": 0.0156, |
|
"eval_rougeLsum": 0.0157, |
|
"eval_runtime": 12.5731, |
|
"eval_samples_per_second": 9.147, |
|
"eval_steps_per_second": 1.591, |
|
"step": 2983 |
|
}, |
|
{ |
|
"epoch": 155.84, |
|
"grad_norm": 2.1833202838897705, |
|
"learning_rate": 6.846315789473684e-06, |
|
"loss": 1.4271, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 156.0, |
|
"eval_gen_len": 9.4696, |
|
"eval_loss": 1.1842440366744995, |
|
"eval_rouge1": 0.0203, |
|
"eval_rouge2": 0.0008, |
|
"eval_rougeL": 0.0174, |
|
"eval_rougeLsum": 0.0174, |
|
"eval_runtime": 12.2741, |
|
"eval_samples_per_second": 9.369, |
|
"eval_steps_per_second": 1.629, |
|
"step": 3003 |
|
}, |
|
{ |
|
"epoch": 156.99, |
|
"eval_gen_len": 9.8174, |
|
"eval_loss": 1.1782081127166748, |
|
"eval_rouge1": 0.0187, |
|
"eval_rouge2": 0.0007, |
|
"eval_rougeL": 0.0163, |
|
"eval_rougeLsum": 0.0165, |
|
"eval_runtime": 13.1906, |
|
"eval_samples_per_second": 8.718, |
|
"eval_steps_per_second": 1.516, |
|
"step": 3022 |
|
}, |
|
{ |
|
"epoch": 157.97, |
|
"eval_gen_len": 9.9304, |
|
"eval_loss": 1.173979640007019, |
|
"eval_rouge1": 0.0206, |
|
"eval_rouge2": 0.0005, |
|
"eval_rougeL": 0.018, |
|
"eval_rougeLsum": 0.0183, |
|
"eval_runtime": 12.6288, |
|
"eval_samples_per_second": 9.106, |
|
"eval_steps_per_second": 1.584, |
|
"step": 3041 |
|
}, |
|
{ |
|
"epoch": 158.96, |
|
"eval_gen_len": 10.0087, |
|
"eval_loss": 1.1698901653289795, |
|
"eval_rouge1": 0.0198, |
|
"eval_rouge2": 0.0005, |
|
"eval_rougeL": 0.0177, |
|
"eval_rougeLsum": 0.018, |
|
"eval_runtime": 12.5926, |
|
"eval_samples_per_second": 9.132, |
|
"eval_steps_per_second": 1.588, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 160.0, |
|
"eval_gen_len": 10.2174, |
|
"eval_loss": 1.1631128787994385, |
|
"eval_rouge1": 0.0214, |
|
"eval_rouge2": 0.0004, |
|
"eval_rougeL": 0.0189, |
|
"eval_rougeLsum": 0.0191, |
|
"eval_runtime": 12.2102, |
|
"eval_samples_per_second": 9.418, |
|
"eval_steps_per_second": 1.638, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 160.99, |
|
"eval_gen_len": 10.1304, |
|
"eval_loss": 1.1569976806640625, |
|
"eval_rouge1": 0.0221, |
|
"eval_rouge2": 0.0009, |
|
"eval_rougeL": 0.0185, |
|
"eval_rougeLsum": 0.0187, |
|
"eval_runtime": 12.421, |
|
"eval_samples_per_second": 9.259, |
|
"eval_steps_per_second": 1.61, |
|
"step": 3099 |
|
}, |
|
{ |
|
"epoch": 161.97, |
|
"eval_gen_len": 10.0609, |
|
"eval_loss": 1.1523972749710083, |
|
"eval_rouge1": 0.0202, |
|
"eval_rouge2": 0.0009, |
|
"eval_rougeL": 0.0169, |
|
"eval_rougeLsum": 0.0171, |
|
"eval_runtime": 12.3412, |
|
"eval_samples_per_second": 9.318, |
|
"eval_steps_per_second": 1.621, |
|
"step": 3118 |
|
}, |
|
{ |
|
"epoch": 162.96, |
|
"eval_gen_len": 9.8609, |
|
"eval_loss": 1.1472958326339722, |
|
"eval_rouge1": 0.0202, |
|
"eval_rouge2": 0.0006, |
|
"eval_rougeL": 0.0173, |
|
"eval_rougeLsum": 0.0174, |
|
"eval_runtime": 12.2339, |
|
"eval_samples_per_second": 9.4, |
|
"eval_steps_per_second": 1.635, |
|
"step": 3137 |
|
}, |
|
{ |
|
"epoch": 164.0, |
|
"eval_gen_len": 10.3913, |
|
"eval_loss": 1.1415693759918213, |
|
"eval_rouge1": 0.0218, |
|
"eval_rouge2": 0.0011, |
|
"eval_rougeL": 0.0184, |
|
"eval_rougeLsum": 0.0183, |
|
"eval_runtime": 12.6611, |
|
"eval_samples_per_second": 9.083, |
|
"eval_steps_per_second": 1.58, |
|
"step": 3157 |
|
}, |
|
{ |
|
"epoch": 164.99, |
|
"eval_gen_len": 9.713, |
|
"eval_loss": 1.135535478591919, |
|
"eval_rouge1": 0.0174, |
|
"eval_rouge2": 0.0005, |
|
"eval_rougeL": 0.0148, |
|
"eval_rougeLsum": 0.0146, |
|
"eval_runtime": 12.6693, |
|
"eval_samples_per_second": 9.077, |
|
"eval_steps_per_second": 1.579, |
|
"step": 3176 |
|
}, |
|
{ |
|
"epoch": 165.97, |
|
"eval_gen_len": 10.113, |
|
"eval_loss": 1.1300948858261108, |
|
"eval_rouge1": 0.0185, |
|
"eval_rouge2": 0.0007, |
|
"eval_rougeL": 0.0151, |
|
"eval_rougeLsum": 0.0151, |
|
"eval_runtime": 12.211, |
|
"eval_samples_per_second": 9.418, |
|
"eval_steps_per_second": 1.638, |
|
"step": 3195 |
|
}, |
|
{ |
|
"epoch": 166.96, |
|
"eval_gen_len": 10.1043, |
|
"eval_loss": 1.125083088874817, |
|
"eval_rouge1": 0.0205, |
|
"eval_rouge2": 0.0008, |
|
"eval_rougeL": 0.0165, |
|
"eval_rougeLsum": 0.0164, |
|
"eval_runtime": 12.1409, |
|
"eval_samples_per_second": 9.472, |
|
"eval_steps_per_second": 1.647, |
|
"step": 3214 |
|
}, |
|
{ |
|
"epoch": 168.0, |
|
"eval_gen_len": 10.2348, |
|
"eval_loss": 1.1202179193496704, |
|
"eval_rouge1": 0.0195, |
|
"eval_rouge2": 0.0008, |
|
"eval_rougeL": 0.0159, |
|
"eval_rougeLsum": 0.0159, |
|
"eval_runtime": 12.1317, |
|
"eval_samples_per_second": 9.479, |
|
"eval_steps_per_second": 1.649, |
|
"step": 3234 |
|
}, |
|
{ |
|
"epoch": 168.99, |
|
"eval_gen_len": 10.8957, |
|
"eval_loss": 1.114139199256897, |
|
"eval_rouge1": 0.0243, |
|
"eval_rouge2": 0.0018, |
|
"eval_rougeL": 0.0198, |
|
"eval_rougeLsum": 0.0198, |
|
"eval_runtime": 12.443, |
|
"eval_samples_per_second": 9.242, |
|
"eval_steps_per_second": 1.607, |
|
"step": 3253 |
|
}, |
|
{ |
|
"epoch": 169.97, |
|
"eval_gen_len": 11.0174, |
|
"eval_loss": 1.1090463399887085, |
|
"eval_rouge1": 0.0202, |
|
"eval_rouge2": 0.0013, |
|
"eval_rougeL": 0.0161, |
|
"eval_rougeLsum": 0.0163, |
|
"eval_runtime": 12.144, |
|
"eval_samples_per_second": 9.47, |
|
"eval_steps_per_second": 1.647, |
|
"step": 3272 |
|
}, |
|
{ |
|
"epoch": 170.96, |
|
"eval_gen_len": 11.313, |
|
"eval_loss": 1.1036903858184814, |
|
"eval_rouge1": 0.0223, |
|
"eval_rouge2": 0.0015, |
|
"eval_rougeL": 0.0186, |
|
"eval_rougeLsum": 0.0186, |
|
"eval_runtime": 12.3793, |
|
"eval_samples_per_second": 9.29, |
|
"eval_steps_per_second": 1.616, |
|
"step": 3291 |
|
}, |
|
{ |
|
"epoch": 172.0, |
|
"eval_gen_len": 11.3739, |
|
"eval_loss": 1.0987364053726196, |
|
"eval_rouge1": 0.0212, |
|
"eval_rouge2": 0.0013, |
|
"eval_rougeL": 0.0178, |
|
"eval_rougeLsum": 0.0179, |
|
"eval_runtime": 12.3964, |
|
"eval_samples_per_second": 9.277, |
|
"eval_steps_per_second": 1.613, |
|
"step": 3311 |
|
}, |
|
{ |
|
"epoch": 172.99, |
|
"eval_gen_len": 11.2522, |
|
"eval_loss": 1.0937457084655762, |
|
"eval_rouge1": 0.0219, |
|
"eval_rouge2": 0.0015, |
|
"eval_rougeL": 0.0182, |
|
"eval_rougeLsum": 0.018, |
|
"eval_runtime": 12.5831, |
|
"eval_samples_per_second": 9.139, |
|
"eval_steps_per_second": 1.589, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 173.97, |
|
"eval_gen_len": 11.2174, |
|
"eval_loss": 1.090100646018982, |
|
"eval_rouge1": 0.0199, |
|
"eval_rouge2": 0.0013, |
|
"eval_rougeL": 0.0162, |
|
"eval_rougeLsum": 0.0163, |
|
"eval_runtime": 12.7101, |
|
"eval_samples_per_second": 9.048, |
|
"eval_steps_per_second": 1.574, |
|
"step": 3349 |
|
}, |
|
{ |
|
"epoch": 174.96, |
|
"eval_gen_len": 11.2174, |
|
"eval_loss": 1.0861694812774658, |
|
"eval_rouge1": 0.018, |
|
"eval_rouge2": 0.0011, |
|
"eval_rougeL": 0.0149, |
|
"eval_rougeLsum": 0.0149, |
|
"eval_runtime": 12.2255, |
|
"eval_samples_per_second": 9.407, |
|
"eval_steps_per_second": 1.636, |
|
"step": 3368 |
|
}, |
|
{ |
|
"epoch": 176.0, |
|
"eval_gen_len": 11.3304, |
|
"eval_loss": 1.080249309539795, |
|
"eval_rouge1": 0.0181, |
|
"eval_rouge2": 0.0013, |
|
"eval_rougeL": 0.0154, |
|
"eval_rougeLsum": 0.0154, |
|
"eval_runtime": 12.2755, |
|
"eval_samples_per_second": 9.368, |
|
"eval_steps_per_second": 1.629, |
|
"step": 3388 |
|
}, |
|
{ |
|
"epoch": 176.99, |
|
"eval_gen_len": 10.9739, |
|
"eval_loss": 1.0751179456710815, |
|
"eval_rouge1": 0.0147, |
|
"eval_rouge2": 0.0012, |
|
"eval_rougeL": 0.0124, |
|
"eval_rougeLsum": 0.0125, |
|
"eval_runtime": 12.3848, |
|
"eval_samples_per_second": 9.286, |
|
"eval_steps_per_second": 1.615, |
|
"step": 3407 |
|
}, |
|
{ |
|
"epoch": 177.97, |
|
"eval_gen_len": 10.8087, |
|
"eval_loss": 1.069909930229187, |
|
"eval_rouge1": 0.0149, |
|
"eval_rouge2": 0.001, |
|
"eval_rougeL": 0.0123, |
|
"eval_rougeLsum": 0.0124, |
|
"eval_runtime": 12.6526, |
|
"eval_samples_per_second": 9.089, |
|
"eval_steps_per_second": 1.581, |
|
"step": 3426 |
|
}, |
|
{ |
|
"epoch": 178.96, |
|
"eval_gen_len": 10.7217, |
|
"eval_loss": 1.0651546716690063, |
|
"eval_rouge1": 0.0134, |
|
"eval_rouge2": 0.0011, |
|
"eval_rougeL": 0.0122, |
|
"eval_rougeLsum": 0.0122, |
|
"eval_runtime": 13.0395, |
|
"eval_samples_per_second": 8.819, |
|
"eval_steps_per_second": 1.534, |
|
"step": 3445 |
|
}, |
|
{ |
|
"epoch": 180.0, |
|
"eval_gen_len": 10.6174, |
|
"eval_loss": 1.060491681098938, |
|
"eval_rouge1": 0.0121, |
|
"eval_rouge2": 0.001, |
|
"eval_rougeL": 0.0101, |
|
"eval_rougeLsum": 0.0101, |
|
"eval_runtime": 12.3246, |
|
"eval_samples_per_second": 9.331, |
|
"eval_steps_per_second": 1.623, |
|
"step": 3465 |
|
}, |
|
{ |
|
"epoch": 180.99, |
|
"eval_gen_len": 10.6435, |
|
"eval_loss": 1.0562814474105835, |
|
"eval_rouge1": 0.0131, |
|
"eval_rouge2": 0.0007, |
|
"eval_rougeL": 0.0113, |
|
"eval_rougeLsum": 0.0111, |
|
"eval_runtime": 13.1955, |
|
"eval_samples_per_second": 8.715, |
|
"eval_steps_per_second": 1.516, |
|
"step": 3484 |
|
}, |
|
{ |
|
"epoch": 181.82, |
|
"grad_norm": 0.9931882619857788, |
|
"learning_rate": 6.3200000000000005e-06, |
|
"loss": 1.265, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 181.97, |
|
"eval_gen_len": 10.3913, |
|
"eval_loss": 1.0519821643829346, |
|
"eval_rouge1": 0.0147, |
|
"eval_rouge2": 0.0019, |
|
"eval_rougeL": 0.0129, |
|
"eval_rougeLsum": 0.0125, |
|
"eval_runtime": 12.3478, |
|
"eval_samples_per_second": 9.313, |
|
"eval_steps_per_second": 1.62, |
|
"step": 3503 |
|
}, |
|
{ |
|
"epoch": 182.96, |
|
"eval_gen_len": 10.9826, |
|
"eval_loss": 1.047600507736206, |
|
"eval_rouge1": 0.0171, |
|
"eval_rouge2": 0.0018, |
|
"eval_rougeL": 0.0148, |
|
"eval_rougeLsum": 0.0148, |
|
"eval_runtime": 12.7548, |
|
"eval_samples_per_second": 9.016, |
|
"eval_steps_per_second": 1.568, |
|
"step": 3522 |
|
}, |
|
{ |
|
"epoch": 184.0, |
|
"eval_gen_len": 10.9478, |
|
"eval_loss": 1.0428956747055054, |
|
"eval_rouge1": 0.019, |
|
"eval_rouge2": 0.0026, |
|
"eval_rougeL": 0.0173, |
|
"eval_rougeLsum": 0.0174, |
|
"eval_runtime": 12.668, |
|
"eval_samples_per_second": 9.078, |
|
"eval_steps_per_second": 1.579, |
|
"step": 3542 |
|
}, |
|
{ |
|
"epoch": 184.99, |
|
"eval_gen_len": 10.6348, |
|
"eval_loss": 1.0391294956207275, |
|
"eval_rouge1": 0.0192, |
|
"eval_rouge2": 0.0019, |
|
"eval_rougeL": 0.016, |
|
"eval_rougeLsum": 0.0162, |
|
"eval_runtime": 12.2592, |
|
"eval_samples_per_second": 9.381, |
|
"eval_steps_per_second": 1.631, |
|
"step": 3561 |
|
}, |
|
{ |
|
"epoch": 185.97, |
|
"eval_gen_len": 10.6, |
|
"eval_loss": 1.0354028940200806, |
|
"eval_rouge1": 0.0192, |
|
"eval_rouge2": 0.0021, |
|
"eval_rougeL": 0.0154, |
|
"eval_rougeLsum": 0.0155, |
|
"eval_runtime": 12.3279, |
|
"eval_samples_per_second": 9.328, |
|
"eval_steps_per_second": 1.622, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 186.96, |
|
"eval_gen_len": 10.2261, |
|
"eval_loss": 1.0318480730056763, |
|
"eval_rouge1": 0.0193, |
|
"eval_rouge2": 0.003, |
|
"eval_rougeL": 0.0162, |
|
"eval_rougeLsum": 0.0163, |
|
"eval_runtime": 12.7454, |
|
"eval_samples_per_second": 9.023, |
|
"eval_steps_per_second": 1.569, |
|
"step": 3599 |
|
}, |
|
{ |
|
"epoch": 188.0, |
|
"eval_gen_len": 10.6261, |
|
"eval_loss": 1.0279144048690796, |
|
"eval_rouge1": 0.0245, |
|
"eval_rouge2": 0.0032, |
|
"eval_rougeL": 0.0201, |
|
"eval_rougeLsum": 0.02, |
|
"eval_runtime": 12.5706, |
|
"eval_samples_per_second": 9.148, |
|
"eval_steps_per_second": 1.591, |
|
"step": 3619 |
|
}, |
|
{ |
|
"epoch": 188.99, |
|
"eval_gen_len": 10.5913, |
|
"eval_loss": 1.0238802433013916, |
|
"eval_rouge1": 0.025, |
|
"eval_rouge2": 0.0029, |
|
"eval_rougeL": 0.0206, |
|
"eval_rougeLsum": 0.0207, |
|
"eval_runtime": 12.4275, |
|
"eval_samples_per_second": 9.254, |
|
"eval_steps_per_second": 1.609, |
|
"step": 3638 |
|
}, |
|
{ |
|
"epoch": 189.97, |
|
"eval_gen_len": 10.2261, |
|
"eval_loss": 1.0197361707687378, |
|
"eval_rouge1": 0.0249, |
|
"eval_rouge2": 0.0029, |
|
"eval_rougeL": 0.0198, |
|
"eval_rougeLsum": 0.0199, |
|
"eval_runtime": 12.6121, |
|
"eval_samples_per_second": 9.118, |
|
"eval_steps_per_second": 1.586, |
|
"step": 3657 |
|
}, |
|
{ |
|
"epoch": 190.96, |
|
"eval_gen_len": 10.1391, |
|
"eval_loss": 1.0159963369369507, |
|
"eval_rouge1": 0.0245, |
|
"eval_rouge2": 0.003, |
|
"eval_rougeL": 0.019, |
|
"eval_rougeLsum": 0.0191, |
|
"eval_runtime": 12.81, |
|
"eval_samples_per_second": 8.977, |
|
"eval_steps_per_second": 1.561, |
|
"step": 3676 |
|
}, |
|
{ |
|
"epoch": 192.0, |
|
"eval_gen_len": 10.2435, |
|
"eval_loss": 1.0119863748550415, |
|
"eval_rouge1": 0.0243, |
|
"eval_rouge2": 0.0027, |
|
"eval_rougeL": 0.019, |
|
"eval_rougeLsum": 0.019, |
|
"eval_runtime": 12.53, |
|
"eval_samples_per_second": 9.178, |
|
"eval_steps_per_second": 1.596, |
|
"step": 3696 |
|
}, |
|
{ |
|
"epoch": 192.99, |
|
"eval_gen_len": 10.3826, |
|
"eval_loss": 1.008431315422058, |
|
"eval_rouge1": 0.0247, |
|
"eval_rouge2": 0.0029, |
|
"eval_rougeL": 0.0194, |
|
"eval_rougeLsum": 0.0193, |
|
"eval_runtime": 12.6196, |
|
"eval_samples_per_second": 9.113, |
|
"eval_steps_per_second": 1.585, |
|
"step": 3715 |
|
}, |
|
{ |
|
"epoch": 193.97, |
|
"eval_gen_len": 10.6696, |
|
"eval_loss": 1.0049232244491577, |
|
"eval_rouge1": 0.0239, |
|
"eval_rouge2": 0.0027, |
|
"eval_rougeL": 0.0186, |
|
"eval_rougeLsum": 0.0185, |
|
"eval_runtime": 12.5612, |
|
"eval_samples_per_second": 9.155, |
|
"eval_steps_per_second": 1.592, |
|
"step": 3734 |
|
}, |
|
{ |
|
"epoch": 194.96, |
|
"eval_gen_len": 11.1043, |
|
"eval_loss": 1.0015385150909424, |
|
"eval_rouge1": 0.0248, |
|
"eval_rouge2": 0.0029, |
|
"eval_rougeL": 0.0195, |
|
"eval_rougeLsum": 0.0195, |
|
"eval_runtime": 11.9572, |
|
"eval_samples_per_second": 9.618, |
|
"eval_steps_per_second": 1.673, |
|
"step": 3753 |
|
}, |
|
{ |
|
"epoch": 196.0, |
|
"eval_gen_len": 10.8609, |
|
"eval_loss": 0.9973338842391968, |
|
"eval_rouge1": 0.0233, |
|
"eval_rouge2": 0.0026, |
|
"eval_rougeL": 0.0189, |
|
"eval_rougeLsum": 0.0189, |
|
"eval_runtime": 11.9829, |
|
"eval_samples_per_second": 9.597, |
|
"eval_steps_per_second": 1.669, |
|
"step": 3773 |
|
}, |
|
{ |
|
"epoch": 196.99, |
|
"eval_gen_len": 10.6783, |
|
"eval_loss": 0.9933551549911499, |
|
"eval_rouge1": 0.0209, |
|
"eval_rouge2": 0.0028, |
|
"eval_rougeL": 0.0172, |
|
"eval_rougeLsum": 0.0172, |
|
"eval_runtime": 12.3195, |
|
"eval_samples_per_second": 9.335, |
|
"eval_steps_per_second": 1.623, |
|
"step": 3792 |
|
}, |
|
{ |
|
"epoch": 197.97, |
|
"eval_gen_len": 10.9043, |
|
"eval_loss": 0.9898021817207336, |
|
"eval_rouge1": 0.0224, |
|
"eval_rouge2": 0.0028, |
|
"eval_rougeL": 0.0183, |
|
"eval_rougeLsum": 0.0182, |
|
"eval_runtime": 12.6246, |
|
"eval_samples_per_second": 9.109, |
|
"eval_steps_per_second": 1.584, |
|
"step": 3811 |
|
}, |
|
{ |
|
"epoch": 198.96, |
|
"eval_gen_len": 11.2435, |
|
"eval_loss": 0.9868430495262146, |
|
"eval_rouge1": 0.0223, |
|
"eval_rouge2": 0.0034, |
|
"eval_rougeL": 0.0186, |
|
"eval_rougeLsum": 0.0186, |
|
"eval_runtime": 12.4605, |
|
"eval_samples_per_second": 9.229, |
|
"eval_steps_per_second": 1.605, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"eval_gen_len": 11.1565, |
|
"eval_loss": 0.9835883975028992, |
|
"eval_rouge1": 0.0212, |
|
"eval_rouge2": 0.0033, |
|
"eval_rougeL": 0.0182, |
|
"eval_rougeLsum": 0.018, |
|
"eval_runtime": 12.0605, |
|
"eval_samples_per_second": 9.535, |
|
"eval_steps_per_second": 1.658, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 200.99, |
|
"eval_gen_len": 11.2087, |
|
"eval_loss": 0.9812867641448975, |
|
"eval_rouge1": 0.0202, |
|
"eval_rouge2": 0.003, |
|
"eval_rougeL": 0.0164, |
|
"eval_rougeLsum": 0.0164, |
|
"eval_runtime": 12.6958, |
|
"eval_samples_per_second": 9.058, |
|
"eval_steps_per_second": 1.575, |
|
"step": 3869 |
|
}, |
|
{ |
|
"epoch": 201.97, |
|
"eval_gen_len": 11.2783, |
|
"eval_loss": 0.9780998229980469, |
|
"eval_rouge1": 0.0192, |
|
"eval_rouge2": 0.0032, |
|
"eval_rougeL": 0.0158, |
|
"eval_rougeLsum": 0.0158, |
|
"eval_runtime": 12.2767, |
|
"eval_samples_per_second": 9.367, |
|
"eval_steps_per_second": 1.629, |
|
"step": 3888 |
|
}, |
|
{ |
|
"epoch": 202.96, |
|
"eval_gen_len": 11.113, |
|
"eval_loss": 0.9748227596282959, |
|
"eval_rouge1": 0.0174, |
|
"eval_rouge2": 0.0028, |
|
"eval_rougeL": 0.0144, |
|
"eval_rougeLsum": 0.0144, |
|
"eval_runtime": 12.2251, |
|
"eval_samples_per_second": 9.407, |
|
"eval_steps_per_second": 1.636, |
|
"step": 3907 |
|
}, |
|
{ |
|
"epoch": 204.0, |
|
"eval_gen_len": 11.3304, |
|
"eval_loss": 0.9713881015777588, |
|
"eval_rouge1": 0.0187, |
|
"eval_rouge2": 0.0026, |
|
"eval_rougeL": 0.0157, |
|
"eval_rougeLsum": 0.0157, |
|
"eval_runtime": 12.1776, |
|
"eval_samples_per_second": 9.444, |
|
"eval_steps_per_second": 1.642, |
|
"step": 3927 |
|
}, |
|
{ |
|
"epoch": 204.99, |
|
"eval_gen_len": 11.5043, |
|
"eval_loss": 0.968216598033905, |
|
"eval_rouge1": 0.0199, |
|
"eval_rouge2": 0.0026, |
|
"eval_rougeL": 0.0164, |
|
"eval_rougeLsum": 0.0166, |
|
"eval_runtime": 12.4259, |
|
"eval_samples_per_second": 9.255, |
|
"eval_steps_per_second": 1.61, |
|
"step": 3946 |
|
}, |
|
{ |
|
"epoch": 205.97, |
|
"eval_gen_len": 11.4261, |
|
"eval_loss": 0.9647319912910461, |
|
"eval_rouge1": 0.0184, |
|
"eval_rouge2": 0.0025, |
|
"eval_rougeL": 0.0154, |
|
"eval_rougeLsum": 0.0154, |
|
"eval_runtime": 12.6085, |
|
"eval_samples_per_second": 9.121, |
|
"eval_steps_per_second": 1.586, |
|
"step": 3965 |
|
}, |
|
{ |
|
"epoch": 206.96, |
|
"eval_gen_len": 11.6087, |
|
"eval_loss": 0.9613582491874695, |
|
"eval_rouge1": 0.0172, |
|
"eval_rouge2": 0.0018, |
|
"eval_rougeL": 0.0146, |
|
"eval_rougeLsum": 0.0145, |
|
"eval_runtime": 12.5542, |
|
"eval_samples_per_second": 9.16, |
|
"eval_steps_per_second": 1.593, |
|
"step": 3984 |
|
}, |
|
{ |
|
"epoch": 207.79, |
|
"grad_norm": 0.6749991178512573, |
|
"learning_rate": 5.793684210526316e-06, |
|
"loss": 1.119, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 208.0, |
|
"eval_gen_len": 11.8087, |
|
"eval_loss": 0.9580429196357727, |
|
"eval_rouge1": 0.0206, |
|
"eval_rouge2": 0.0023, |
|
"eval_rougeL": 0.0168, |
|
"eval_rougeLsum": 0.0167, |
|
"eval_runtime": 12.4965, |
|
"eval_samples_per_second": 9.203, |
|
"eval_steps_per_second": 1.6, |
|
"step": 4004 |
|
}, |
|
{ |
|
"epoch": 208.99, |
|
"eval_gen_len": 12.0957, |
|
"eval_loss": 0.9548400640487671, |
|
"eval_rouge1": 0.0233, |
|
"eval_rouge2": 0.0023, |
|
"eval_rougeL": 0.019, |
|
"eval_rougeLsum": 0.019, |
|
"eval_runtime": 13.061, |
|
"eval_samples_per_second": 8.805, |
|
"eval_steps_per_second": 1.531, |
|
"step": 4023 |
|
}, |
|
{ |
|
"epoch": 209.97, |
|
"eval_gen_len": 11.9826, |
|
"eval_loss": 0.9517626166343689, |
|
"eval_rouge1": 0.0214, |
|
"eval_rouge2": 0.0021, |
|
"eval_rougeL": 0.0181, |
|
"eval_rougeLsum": 0.018, |
|
"eval_runtime": 12.6476, |
|
"eval_samples_per_second": 9.093, |
|
"eval_steps_per_second": 1.581, |
|
"step": 4042 |
|
}, |
|
{ |
|
"epoch": 210.96, |
|
"eval_gen_len": 11.9304, |
|
"eval_loss": 0.9485481381416321, |
|
"eval_rouge1": 0.0208, |
|
"eval_rouge2": 0.0018, |
|
"eval_rougeL": 0.0171, |
|
"eval_rougeLsum": 0.0172, |
|
"eval_runtime": 12.4352, |
|
"eval_samples_per_second": 9.248, |
|
"eval_steps_per_second": 1.608, |
|
"step": 4061 |
|
}, |
|
{ |
|
"epoch": 212.0, |
|
"eval_gen_len": 11.7826, |
|
"eval_loss": 0.9455087184906006, |
|
"eval_rouge1": 0.0184, |
|
"eval_rouge2": 0.0016, |
|
"eval_rougeL": 0.0152, |
|
"eval_rougeLsum": 0.015, |
|
"eval_runtime": 12.868, |
|
"eval_samples_per_second": 8.937, |
|
"eval_steps_per_second": 1.554, |
|
"step": 4081 |
|
}, |
|
{ |
|
"epoch": 212.99, |
|
"eval_gen_len": 11.7565, |
|
"eval_loss": 0.9424554109573364, |
|
"eval_rouge1": 0.0186, |
|
"eval_rouge2": 0.0028, |
|
"eval_rougeL": 0.0153, |
|
"eval_rougeLsum": 0.0153, |
|
"eval_runtime": 12.3925, |
|
"eval_samples_per_second": 9.28, |
|
"eval_steps_per_second": 1.614, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 213.97, |
|
"eval_gen_len": 11.3913, |
|
"eval_loss": 0.939349889755249, |
|
"eval_rouge1": 0.0165, |
|
"eval_rouge2": 0.002, |
|
"eval_rougeL": 0.0131, |
|
"eval_rougeLsum": 0.0131, |
|
"eval_runtime": 12.3014, |
|
"eval_samples_per_second": 9.349, |
|
"eval_steps_per_second": 1.626, |
|
"step": 4119 |
|
}, |
|
{ |
|
"epoch": 214.96, |
|
"eval_gen_len": 11.4522, |
|
"eval_loss": 0.9365057349205017, |
|
"eval_rouge1": 0.0177, |
|
"eval_rouge2": 0.0022, |
|
"eval_rougeL": 0.0143, |
|
"eval_rougeLsum": 0.0143, |
|
"eval_runtime": 12.8423, |
|
"eval_samples_per_second": 8.955, |
|
"eval_steps_per_second": 1.557, |
|
"step": 4138 |
|
}, |
|
{ |
|
"epoch": 216.0, |
|
"eval_gen_len": 11.7391, |
|
"eval_loss": 0.9332289099693298, |
|
"eval_rouge1": 0.0213, |
|
"eval_rouge2": 0.0028, |
|
"eval_rougeL": 0.0177, |
|
"eval_rougeLsum": 0.0173, |
|
"eval_runtime": 12.4944, |
|
"eval_samples_per_second": 9.204, |
|
"eval_steps_per_second": 1.601, |
|
"step": 4158 |
|
}, |
|
{ |
|
"epoch": 216.99, |
|
"eval_gen_len": 11.6522, |
|
"eval_loss": 0.9310381412506104, |
|
"eval_rouge1": 0.0197, |
|
"eval_rouge2": 0.0028, |
|
"eval_rougeL": 0.0159, |
|
"eval_rougeLsum": 0.0157, |
|
"eval_runtime": 12.5982, |
|
"eval_samples_per_second": 9.128, |
|
"eval_steps_per_second": 1.588, |
|
"step": 4177 |
|
}, |
|
{ |
|
"epoch": 217.97, |
|
"eval_gen_len": 11.687, |
|
"eval_loss": 0.9279318451881409, |
|
"eval_rouge1": 0.0203, |
|
"eval_rouge2": 0.0029, |
|
"eval_rougeL": 0.0168, |
|
"eval_rougeLsum": 0.0165, |
|
"eval_runtime": 12.2551, |
|
"eval_samples_per_second": 9.384, |
|
"eval_steps_per_second": 1.632, |
|
"step": 4196 |
|
}, |
|
{ |
|
"epoch": 218.96, |
|
"eval_gen_len": 11.7043, |
|
"eval_loss": 0.9249460697174072, |
|
"eval_rouge1": 0.0228, |
|
"eval_rouge2": 0.0032, |
|
"eval_rougeL": 0.0191, |
|
"eval_rougeLsum": 0.019, |
|
"eval_runtime": 12.661, |
|
"eval_samples_per_second": 9.083, |
|
"eval_steps_per_second": 1.58, |
|
"step": 4215 |
|
}, |
|
{ |
|
"epoch": 220.0, |
|
"eval_gen_len": 11.2783, |
|
"eval_loss": 0.9218883514404297, |
|
"eval_rouge1": 0.0219, |
|
"eval_rouge2": 0.0032, |
|
"eval_rougeL": 0.0182, |
|
"eval_rougeLsum": 0.018, |
|
"eval_runtime": 12.5398, |
|
"eval_samples_per_second": 9.171, |
|
"eval_steps_per_second": 1.595, |
|
"step": 4235 |
|
}, |
|
{ |
|
"epoch": 220.99, |
|
"eval_gen_len": 11.0087, |
|
"eval_loss": 0.9194144010543823, |
|
"eval_rouge1": 0.0203, |
|
"eval_rouge2": 0.0029, |
|
"eval_rougeL": 0.0171, |
|
"eval_rougeLsum": 0.0167, |
|
"eval_runtime": 12.4516, |
|
"eval_samples_per_second": 9.236, |
|
"eval_steps_per_second": 1.606, |
|
"step": 4254 |
|
}, |
|
{ |
|
"epoch": 221.97, |
|
"eval_gen_len": 10.8174, |
|
"eval_loss": 0.9165053963661194, |
|
"eval_rouge1": 0.0197, |
|
"eval_rouge2": 0.0021, |
|
"eval_rougeL": 0.0164, |
|
"eval_rougeLsum": 0.0161, |
|
"eval_runtime": 12.4796, |
|
"eval_samples_per_second": 9.215, |
|
"eval_steps_per_second": 1.603, |
|
"step": 4273 |
|
}, |
|
{ |
|
"epoch": 222.96, |
|
"eval_gen_len": 10.9652, |
|
"eval_loss": 0.9133741855621338, |
|
"eval_rouge1": 0.0226, |
|
"eval_rouge2": 0.0027, |
|
"eval_rougeL": 0.0185, |
|
"eval_rougeLsum": 0.0182, |
|
"eval_runtime": 12.2854, |
|
"eval_samples_per_second": 9.361, |
|
"eval_steps_per_second": 1.628, |
|
"step": 4292 |
|
}, |
|
{ |
|
"epoch": 224.0, |
|
"eval_gen_len": 10.7565, |
|
"eval_loss": 0.9105393886566162, |
|
"eval_rouge1": 0.0245, |
|
"eval_rouge2": 0.0032, |
|
"eval_rougeL": 0.0199, |
|
"eval_rougeLsum": 0.0197, |
|
"eval_runtime": 12.0278, |
|
"eval_samples_per_second": 9.561, |
|
"eval_steps_per_second": 1.663, |
|
"step": 4312 |
|
}, |
|
{ |
|
"epoch": 224.99, |
|
"eval_gen_len": 10.1391, |
|
"eval_loss": 0.907561719417572, |
|
"eval_rouge1": 0.0198, |
|
"eval_rouge2": 0.0025, |
|
"eval_rougeL": 0.0163, |
|
"eval_rougeLsum": 0.0161, |
|
"eval_runtime": 12.2338, |
|
"eval_samples_per_second": 9.4, |
|
"eval_steps_per_second": 1.635, |
|
"step": 4331 |
|
}, |
|
{ |
|
"epoch": 225.97, |
|
"eval_gen_len": 9.8522, |
|
"eval_loss": 0.9046717286109924, |
|
"eval_rouge1": 0.0171, |
|
"eval_rouge2": 0.0029, |
|
"eval_rougeL": 0.0145, |
|
"eval_rougeLsum": 0.0141, |
|
"eval_runtime": 12.5334, |
|
"eval_samples_per_second": 9.175, |
|
"eval_steps_per_second": 1.596, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 226.96, |
|
"eval_gen_len": 9.6, |
|
"eval_loss": 0.9021281599998474, |
|
"eval_rouge1": 0.0167, |
|
"eval_rouge2": 0.0025, |
|
"eval_rougeL": 0.0145, |
|
"eval_rougeLsum": 0.0141, |
|
"eval_runtime": 12.1542, |
|
"eval_samples_per_second": 9.462, |
|
"eval_steps_per_second": 1.646, |
|
"step": 4369 |
|
}, |
|
{ |
|
"epoch": 228.0, |
|
"eval_gen_len": 9.6261, |
|
"eval_loss": 0.8991298675537109, |
|
"eval_rouge1": 0.0181, |
|
"eval_rouge2": 0.0019, |
|
"eval_rougeL": 0.0153, |
|
"eval_rougeLsum": 0.0148, |
|
"eval_runtime": 12.1167, |
|
"eval_samples_per_second": 9.491, |
|
"eval_steps_per_second": 1.651, |
|
"step": 4389 |
|
}, |
|
{ |
|
"epoch": 228.99, |
|
"eval_gen_len": 9.687, |
|
"eval_loss": 0.8962268829345703, |
|
"eval_rouge1": 0.0217, |
|
"eval_rouge2": 0.0027, |
|
"eval_rougeL": 0.0176, |
|
"eval_rougeLsum": 0.0172, |
|
"eval_runtime": 12.1668, |
|
"eval_samples_per_second": 9.452, |
|
"eval_steps_per_second": 1.644, |
|
"step": 4408 |
|
}, |
|
{ |
|
"epoch": 229.97, |
|
"eval_gen_len": 9.2435, |
|
"eval_loss": 0.8939462304115295, |
|
"eval_rouge1": 0.0223, |
|
"eval_rouge2": 0.0029, |
|
"eval_rougeL": 0.0178, |
|
"eval_rougeLsum": 0.0175, |
|
"eval_runtime": 12.2423, |
|
"eval_samples_per_second": 9.394, |
|
"eval_steps_per_second": 1.634, |
|
"step": 4427 |
|
}, |
|
{ |
|
"epoch": 230.96, |
|
"eval_gen_len": 9.1304, |
|
"eval_loss": 0.8907042145729065, |
|
"eval_rouge1": 0.0216, |
|
"eval_rouge2": 0.0029, |
|
"eval_rougeL": 0.0179, |
|
"eval_rougeLsum": 0.0175, |
|
"eval_runtime": 12.2251, |
|
"eval_samples_per_second": 9.407, |
|
"eval_steps_per_second": 1.636, |
|
"step": 4446 |
|
}, |
|
{ |
|
"epoch": 232.0, |
|
"eval_gen_len": 8.9652, |
|
"eval_loss": 0.8877010345458984, |
|
"eval_rouge1": 0.0211, |
|
"eval_rouge2": 0.0025, |
|
"eval_rougeL": 0.0169, |
|
"eval_rougeLsum": 0.0166, |
|
"eval_runtime": 12.123, |
|
"eval_samples_per_second": 9.486, |
|
"eval_steps_per_second": 1.65, |
|
"step": 4466 |
|
}, |
|
{ |
|
"epoch": 232.99, |
|
"eval_gen_len": 8.7739, |
|
"eval_loss": 0.8858217597007751, |
|
"eval_rouge1": 0.0209, |
|
"eval_rouge2": 0.0027, |
|
"eval_rougeL": 0.0168, |
|
"eval_rougeLsum": 0.0164, |
|
"eval_runtime": 12.3317, |
|
"eval_samples_per_second": 9.326, |
|
"eval_steps_per_second": 1.622, |
|
"step": 4485 |
|
}, |
|
{ |
|
"epoch": 233.77, |
|
"grad_norm": 0.45308393239974976, |
|
"learning_rate": 5.267368421052632e-06, |
|
"loss": 1.0189, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 233.97, |
|
"eval_gen_len": 8.6087, |
|
"eval_loss": 0.8837451934814453, |
|
"eval_rouge1": 0.0221, |
|
"eval_rouge2": 0.0032, |
|
"eval_rougeL": 0.0177, |
|
"eval_rougeLsum": 0.0173, |
|
"eval_runtime": 13.1885, |
|
"eval_samples_per_second": 8.72, |
|
"eval_steps_per_second": 1.516, |
|
"step": 4504 |
|
}, |
|
{ |
|
"epoch": 234.96, |
|
"eval_gen_len": 8.487, |
|
"eval_loss": 0.8812865614891052, |
|
"eval_rouge1": 0.0224, |
|
"eval_rouge2": 0.003, |
|
"eval_rougeL": 0.0175, |
|
"eval_rougeLsum": 0.0172, |
|
"eval_runtime": 12.2108, |
|
"eval_samples_per_second": 9.418, |
|
"eval_steps_per_second": 1.638, |
|
"step": 4523 |
|
}, |
|
{ |
|
"epoch": 236.0, |
|
"eval_gen_len": 8.2957, |
|
"eval_loss": 0.8780920505523682, |
|
"eval_rouge1": 0.0225, |
|
"eval_rouge2": 0.0028, |
|
"eval_rougeL": 0.0171, |
|
"eval_rougeLsum": 0.0168, |
|
"eval_runtime": 12.2449, |
|
"eval_samples_per_second": 9.392, |
|
"eval_steps_per_second": 1.633, |
|
"step": 4543 |
|
}, |
|
{ |
|
"epoch": 236.99, |
|
"eval_gen_len": 7.9304, |
|
"eval_loss": 0.8753093481063843, |
|
"eval_rouge1": 0.0215, |
|
"eval_rouge2": 0.0027, |
|
"eval_rougeL": 0.016, |
|
"eval_rougeLsum": 0.0158, |
|
"eval_runtime": 12.4484, |
|
"eval_samples_per_second": 9.238, |
|
"eval_steps_per_second": 1.607, |
|
"step": 4562 |
|
}, |
|
{ |
|
"epoch": 237.97, |
|
"eval_gen_len": 7.8174, |
|
"eval_loss": 0.8730840086936951, |
|
"eval_rouge1": 0.0211, |
|
"eval_rouge2": 0.0027, |
|
"eval_rougeL": 0.016, |
|
"eval_rougeLsum": 0.0156, |
|
"eval_runtime": 12.5923, |
|
"eval_samples_per_second": 9.133, |
|
"eval_steps_per_second": 1.588, |
|
"step": 4581 |
|
}, |
|
{ |
|
"epoch": 238.96, |
|
"eval_gen_len": 7.687, |
|
"eval_loss": 0.8703946471214294, |
|
"eval_rouge1": 0.0209, |
|
"eval_rouge2": 0.0027, |
|
"eval_rougeL": 0.0158, |
|
"eval_rougeLsum": 0.0154, |
|
"eval_runtime": 12.8465, |
|
"eval_samples_per_second": 8.952, |
|
"eval_steps_per_second": 1.557, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 240.0, |
|
"eval_gen_len": 7.3652, |
|
"eval_loss": 0.8674846887588501, |
|
"eval_rouge1": 0.0211, |
|
"eval_rouge2": 0.0027, |
|
"eval_rougeL": 0.0158, |
|
"eval_rougeLsum": 0.0154, |
|
"eval_runtime": 12.4238, |
|
"eval_samples_per_second": 9.256, |
|
"eval_steps_per_second": 1.61, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 240.99, |
|
"eval_gen_len": 7.2609, |
|
"eval_loss": 0.8647277355194092, |
|
"eval_rouge1": 0.0204, |
|
"eval_rouge2": 0.0022, |
|
"eval_rougeL": 0.0147, |
|
"eval_rougeLsum": 0.0143, |
|
"eval_runtime": 12.6703, |
|
"eval_samples_per_second": 9.076, |
|
"eval_steps_per_second": 1.578, |
|
"step": 4639 |
|
}, |
|
{ |
|
"epoch": 241.97, |
|
"eval_gen_len": 7.0609, |
|
"eval_loss": 0.8625157475471497, |
|
"eval_rouge1": 0.0206, |
|
"eval_rouge2": 0.0023, |
|
"eval_rougeL": 0.0152, |
|
"eval_rougeLsum": 0.0149, |
|
"eval_runtime": 12.5819, |
|
"eval_samples_per_second": 9.14, |
|
"eval_steps_per_second": 1.59, |
|
"step": 4658 |
|
}, |
|
{ |
|
"epoch": 242.96, |
|
"eval_gen_len": 6.5652, |
|
"eval_loss": 0.8605428338050842, |
|
"eval_rouge1": 0.0182, |
|
"eval_rouge2": 0.0017, |
|
"eval_rougeL": 0.0133, |
|
"eval_rougeLsum": 0.0131, |
|
"eval_runtime": 13.1768, |
|
"eval_samples_per_second": 8.727, |
|
"eval_steps_per_second": 1.518, |
|
"step": 4677 |
|
}, |
|
{ |
|
"epoch": 244.0, |
|
"eval_gen_len": 6.4261, |
|
"eval_loss": 0.8578657507896423, |
|
"eval_rouge1": 0.0177, |
|
"eval_rouge2": 0.0021, |
|
"eval_rougeL": 0.0134, |
|
"eval_rougeLsum": 0.0131, |
|
"eval_runtime": 12.6947, |
|
"eval_samples_per_second": 9.059, |
|
"eval_steps_per_second": 1.575, |
|
"step": 4697 |
|
}, |
|
{ |
|
"epoch": 244.99, |
|
"eval_gen_len": 6.2783, |
|
"eval_loss": 0.8557173609733582, |
|
"eval_rouge1": 0.0177, |
|
"eval_rouge2": 0.0021, |
|
"eval_rougeL": 0.0134, |
|
"eval_rougeLsum": 0.013, |
|
"eval_runtime": 12.5391, |
|
"eval_samples_per_second": 9.171, |
|
"eval_steps_per_second": 1.595, |
|
"step": 4716 |
|
}, |
|
{ |
|
"epoch": 245.97, |
|
"eval_gen_len": 6.2435, |
|
"eval_loss": 0.8529919981956482, |
|
"eval_rouge1": 0.0169, |
|
"eval_rouge2": 0.0014, |
|
"eval_rougeL": 0.0131, |
|
"eval_rougeLsum": 0.0127, |
|
"eval_runtime": 12.7454, |
|
"eval_samples_per_second": 9.023, |
|
"eval_steps_per_second": 1.569, |
|
"step": 4735 |
|
}, |
|
{ |
|
"epoch": 246.96, |
|
"eval_gen_len": 6.1565, |
|
"eval_loss": 0.850603461265564, |
|
"eval_rouge1": 0.0191, |
|
"eval_rouge2": 0.0019, |
|
"eval_rougeL": 0.0145, |
|
"eval_rougeLsum": 0.0141, |
|
"eval_runtime": 12.284, |
|
"eval_samples_per_second": 9.362, |
|
"eval_steps_per_second": 1.628, |
|
"step": 4754 |
|
}, |
|
{ |
|
"epoch": 248.0, |
|
"eval_gen_len": 5.9478, |
|
"eval_loss": 0.8480112552642822, |
|
"eval_rouge1": 0.0186, |
|
"eval_rouge2": 0.0015, |
|
"eval_rougeL": 0.0146, |
|
"eval_rougeLsum": 0.0142, |
|
"eval_runtime": 12.3791, |
|
"eval_samples_per_second": 9.29, |
|
"eval_steps_per_second": 1.616, |
|
"step": 4774 |
|
}, |
|
{ |
|
"epoch": 248.99, |
|
"eval_gen_len": 5.7043, |
|
"eval_loss": 0.8458153009414673, |
|
"eval_rouge1": 0.0173, |
|
"eval_rouge2": 0.0013, |
|
"eval_rougeL": 0.0137, |
|
"eval_rougeLsum": 0.0131, |
|
"eval_runtime": 12.4132, |
|
"eval_samples_per_second": 9.264, |
|
"eval_steps_per_second": 1.611, |
|
"step": 4793 |
|
}, |
|
{ |
|
"epoch": 249.97, |
|
"eval_gen_len": 5.7478, |
|
"eval_loss": 0.8430487513542175, |
|
"eval_rouge1": 0.0169, |
|
"eval_rouge2": 0.0015, |
|
"eval_rougeL": 0.0136, |
|
"eval_rougeLsum": 0.0133, |
|
"eval_runtime": 12.8141, |
|
"eval_samples_per_second": 8.974, |
|
"eval_steps_per_second": 1.561, |
|
"step": 4812 |
|
}, |
|
{ |
|
"epoch": 250.96, |
|
"eval_gen_len": 5.3739, |
|
"eval_loss": 0.841323971748352, |
|
"eval_rouge1": 0.0152, |
|
"eval_rouge2": 0.0016, |
|
"eval_rougeL": 0.0124, |
|
"eval_rougeLsum": 0.0121, |
|
"eval_runtime": 12.5346, |
|
"eval_samples_per_second": 9.175, |
|
"eval_steps_per_second": 1.596, |
|
"step": 4831 |
|
}, |
|
{ |
|
"epoch": 252.0, |
|
"eval_gen_len": 5.1565, |
|
"eval_loss": 0.838948130607605, |
|
"eval_rouge1": 0.0149, |
|
"eval_rouge2": 0.0011, |
|
"eval_rougeL": 0.0124, |
|
"eval_rougeLsum": 0.012, |
|
"eval_runtime": 13.0143, |
|
"eval_samples_per_second": 8.836, |
|
"eval_steps_per_second": 1.537, |
|
"step": 4851 |
|
}, |
|
{ |
|
"epoch": 252.99, |
|
"eval_gen_len": 4.9739, |
|
"eval_loss": 0.8368021249771118, |
|
"eval_rouge1": 0.0148, |
|
"eval_rouge2": 0.0011, |
|
"eval_rougeL": 0.0123, |
|
"eval_rougeLsum": 0.0119, |
|
"eval_runtime": 12.4217, |
|
"eval_samples_per_second": 9.258, |
|
"eval_steps_per_second": 1.61, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 253.97, |
|
"eval_gen_len": 4.9652, |
|
"eval_loss": 0.8342902660369873, |
|
"eval_rouge1": 0.0158, |
|
"eval_rouge2": 0.0011, |
|
"eval_rougeL": 0.013, |
|
"eval_rougeLsum": 0.0127, |
|
"eval_runtime": 12.2067, |
|
"eval_samples_per_second": 9.421, |
|
"eval_steps_per_second": 1.638, |
|
"step": 4889 |
|
}, |
|
{ |
|
"epoch": 254.96, |
|
"eval_gen_len": 4.6522, |
|
"eval_loss": 0.8321281671524048, |
|
"eval_rouge1": 0.0145, |
|
"eval_rouge2": 0.0009, |
|
"eval_rougeL": 0.012, |
|
"eval_rougeLsum": 0.0117, |
|
"eval_runtime": 12.8126, |
|
"eval_samples_per_second": 8.976, |
|
"eval_steps_per_second": 1.561, |
|
"step": 4908 |
|
}, |
|
{ |
|
"epoch": 256.0, |
|
"eval_gen_len": 4.2522, |
|
"eval_loss": 0.8296378254890442, |
|
"eval_rouge1": 0.0139, |
|
"eval_rouge2": 0.0009, |
|
"eval_rougeL": 0.0113, |
|
"eval_rougeLsum": 0.0112, |
|
"eval_runtime": 12.3844, |
|
"eval_samples_per_second": 9.286, |
|
"eval_steps_per_second": 1.615, |
|
"step": 4928 |
|
}, |
|
{ |
|
"epoch": 256.99, |
|
"eval_gen_len": 4.1826, |
|
"eval_loss": 0.8276596069335938, |
|
"eval_rouge1": 0.0143, |
|
"eval_rouge2": 0.0009, |
|
"eval_rougeL": 0.0118, |
|
"eval_rougeLsum": 0.0117, |
|
"eval_runtime": 12.4227, |
|
"eval_samples_per_second": 9.257, |
|
"eval_steps_per_second": 1.61, |
|
"step": 4947 |
|
}, |
|
{ |
|
"epoch": 257.97, |
|
"eval_gen_len": 3.6261, |
|
"eval_loss": 0.8265025019645691, |
|
"eval_rouge1": 0.0127, |
|
"eval_rouge2": 0.0007, |
|
"eval_rougeL": 0.0103, |
|
"eval_rougeLsum": 0.01, |
|
"eval_runtime": 12.2122, |
|
"eval_samples_per_second": 9.417, |
|
"eval_steps_per_second": 1.638, |
|
"step": 4966 |
|
}, |
|
{ |
|
"epoch": 258.96, |
|
"eval_gen_len": 3.2609, |
|
"eval_loss": 0.8242406845092773, |
|
"eval_rouge1": 0.0122, |
|
"eval_rouge2": 0.0013, |
|
"eval_rougeL": 0.0102, |
|
"eval_rougeLsum": 0.0101, |
|
"eval_runtime": 12.2099, |
|
"eval_samples_per_second": 9.419, |
|
"eval_steps_per_second": 1.638, |
|
"step": 4985 |
|
}, |
|
{ |
|
"epoch": 259.74, |
|
"grad_norm": 0.4785182774066925, |
|
"learning_rate": 4.741052631578948e-06, |
|
"loss": 0.9442, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 260.0, |
|
"eval_gen_len": 2.8, |
|
"eval_loss": 0.8225219249725342, |
|
"eval_rouge1": 0.0097, |
|
"eval_rouge2": 0.0011, |
|
"eval_rougeL": 0.0082, |
|
"eval_rougeLsum": 0.0082, |
|
"eval_runtime": 12.9652, |
|
"eval_samples_per_second": 8.87, |
|
"eval_steps_per_second": 1.543, |
|
"step": 5005 |
|
}, |
|
{ |
|
"epoch": 260.99, |
|
"eval_gen_len": 2.5652, |
|
"eval_loss": 0.8206771612167358, |
|
"eval_rouge1": 0.0087, |
|
"eval_rouge2": 0.0011, |
|
"eval_rougeL": 0.0071, |
|
"eval_rougeLsum": 0.0069, |
|
"eval_runtime": 12.2186, |
|
"eval_samples_per_second": 9.412, |
|
"eval_steps_per_second": 1.637, |
|
"step": 5024 |
|
}, |
|
{ |
|
"epoch": 261.97, |
|
"eval_gen_len": 2.2348, |
|
"eval_loss": 0.818169891834259, |
|
"eval_rouge1": 0.0072, |
|
"eval_rouge2": 0.0005, |
|
"eval_rougeL": 0.0059, |
|
"eval_rougeLsum": 0.0058, |
|
"eval_runtime": 12.5714, |
|
"eval_samples_per_second": 9.148, |
|
"eval_steps_per_second": 1.591, |
|
"step": 5043 |
|
}, |
|
{ |
|
"epoch": 262.96, |
|
"eval_gen_len": 2.2, |
|
"eval_loss": 0.8162385821342468, |
|
"eval_rouge1": 0.0062, |
|
"eval_rouge2": 0.0002, |
|
"eval_rougeL": 0.0051, |
|
"eval_rougeLsum": 0.0051, |
|
"eval_runtime": 12.5747, |
|
"eval_samples_per_second": 9.145, |
|
"eval_steps_per_second": 1.59, |
|
"step": 5062 |
|
}, |
|
{ |
|
"epoch": 264.0, |
|
"eval_gen_len": 2.2087, |
|
"eval_loss": 0.8145304918289185, |
|
"eval_rouge1": 0.0068, |
|
"eval_rouge2": 0.0005, |
|
"eval_rougeL": 0.0056, |
|
"eval_rougeLsum": 0.0057, |
|
"eval_runtime": 12.6293, |
|
"eval_samples_per_second": 9.106, |
|
"eval_steps_per_second": 1.584, |
|
"step": 5082 |
|
}, |
|
{ |
|
"epoch": 264.99, |
|
"eval_gen_len": 2.3304, |
|
"eval_loss": 0.8127499222755432, |
|
"eval_rouge1": 0.0086, |
|
"eval_rouge2": 0.002, |
|
"eval_rougeL": 0.0076, |
|
"eval_rougeLsum": 0.0075, |
|
"eval_runtime": 13.2312, |
|
"eval_samples_per_second": 8.692, |
|
"eval_steps_per_second": 1.512, |
|
"step": 5101 |
|
}, |
|
{ |
|
"epoch": 265.97, |
|
"eval_gen_len": 1.8957, |
|
"eval_loss": 0.811177670955658, |
|
"eval_rouge1": 0.0057, |
|
"eval_rouge2": 0.001, |
|
"eval_rougeL": 0.0052, |
|
"eval_rougeLsum": 0.0051, |
|
"eval_runtime": 12.5731, |
|
"eval_samples_per_second": 9.147, |
|
"eval_steps_per_second": 1.591, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 266.96, |
|
"eval_gen_len": 1.513, |
|
"eval_loss": 0.8090675473213196, |
|
"eval_rouge1": 0.0042, |
|
"eval_rouge2": 0.0007, |
|
"eval_rougeL": 0.004, |
|
"eval_rougeLsum": 0.004, |
|
"eval_runtime": 12.391, |
|
"eval_samples_per_second": 9.281, |
|
"eval_steps_per_second": 1.614, |
|
"step": 5139 |
|
}, |
|
{ |
|
"epoch": 268.0, |
|
"eval_gen_len": 1.2435, |
|
"eval_loss": 0.8073368668556213, |
|
"eval_rouge1": 0.0031, |
|
"eval_rouge2": 0.0006, |
|
"eval_rougeL": 0.0029, |
|
"eval_rougeLsum": 0.0029, |
|
"eval_runtime": 12.3681, |
|
"eval_samples_per_second": 9.298, |
|
"eval_steps_per_second": 1.617, |
|
"step": 5159 |
|
}, |
|
{ |
|
"epoch": 268.99, |
|
"eval_gen_len": 1.0348, |
|
"eval_loss": 0.8059150576591492, |
|
"eval_rouge1": 0.0031, |
|
"eval_rouge2": 0.0006, |
|
"eval_rougeL": 0.0029, |
|
"eval_rougeLsum": 0.0029, |
|
"eval_runtime": 12.3463, |
|
"eval_samples_per_second": 9.315, |
|
"eval_steps_per_second": 1.62, |
|
"step": 5178 |
|
}, |
|
{ |
|
"epoch": 269.97, |
|
"eval_gen_len": 0.6348, |
|
"eval_loss": 0.8042454123497009, |
|
"eval_rouge1": 0.0007, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0007, |
|
"eval_rougeLsum": 0.0007, |
|
"eval_runtime": 12.4194, |
|
"eval_samples_per_second": 9.26, |
|
"eval_steps_per_second": 1.61, |
|
"step": 5197 |
|
}, |
|
{ |
|
"epoch": 270.96, |
|
"eval_gen_len": 0.7304, |
|
"eval_loss": 0.8023030161857605, |
|
"eval_rouge1": 0.0012, |
|
"eval_rouge2": 0.0003, |
|
"eval_rougeL": 0.0011, |
|
"eval_rougeLsum": 0.001, |
|
"eval_runtime": 12.5901, |
|
"eval_samples_per_second": 9.134, |
|
"eval_steps_per_second": 1.589, |
|
"step": 5216 |
|
}, |
|
{ |
|
"epoch": 272.0, |
|
"eval_gen_len": 0.8, |
|
"eval_loss": 0.8001125454902649, |
|
"eval_rouge1": 0.0012, |
|
"eval_rouge2": 0.0003, |
|
"eval_rougeL": 0.0011, |
|
"eval_rougeLsum": 0.001, |
|
"eval_runtime": 12.5732, |
|
"eval_samples_per_second": 9.146, |
|
"eval_steps_per_second": 1.591, |
|
"step": 5236 |
|
}, |
|
{ |
|
"epoch": 272.99, |
|
"eval_gen_len": 0.6348, |
|
"eval_loss": 0.7986020445823669, |
|
"eval_rouge1": 0.0012, |
|
"eval_rouge2": 0.0003, |
|
"eval_rougeL": 0.0011, |
|
"eval_rougeLsum": 0.001, |
|
"eval_runtime": 12.389, |
|
"eval_samples_per_second": 9.282, |
|
"eval_steps_per_second": 1.614, |
|
"step": 5255 |
|
}, |
|
{ |
|
"epoch": 273.97, |
|
"eval_gen_len": 0.7478, |
|
"eval_loss": 0.7969604730606079, |
|
"eval_rouge1": 0.0012, |
|
"eval_rouge2": 0.0003, |
|
"eval_rougeL": 0.0011, |
|
"eval_rougeLsum": 0.001, |
|
"eval_runtime": 12.3419, |
|
"eval_samples_per_second": 9.318, |
|
"eval_steps_per_second": 1.62, |
|
"step": 5274 |
|
}, |
|
{ |
|
"epoch": 274.96, |
|
"eval_gen_len": 0.5826, |
|
"eval_loss": 0.795600175857544, |
|
"eval_rouge1": 0.0004, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0004, |
|
"eval_rougeLsum": 0.0004, |
|
"eval_runtime": 12.3627, |
|
"eval_samples_per_second": 9.302, |
|
"eval_steps_per_second": 1.618, |
|
"step": 5293 |
|
}, |
|
{ |
|
"epoch": 276.0, |
|
"eval_gen_len": 0.4, |
|
"eval_loss": 0.7938172817230225, |
|
"eval_rouge1": 0.0004, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0004, |
|
"eval_rougeLsum": 0.0004, |
|
"eval_runtime": 12.3706, |
|
"eval_samples_per_second": 9.296, |
|
"eval_steps_per_second": 1.617, |
|
"step": 5313 |
|
}, |
|
{ |
|
"epoch": 276.99, |
|
"eval_gen_len": 0.2261, |
|
"eval_loss": 0.7923696041107178, |
|
"eval_rouge1": 0.0002, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0002, |
|
"eval_rougeLsum": 0.0002, |
|
"eval_runtime": 12.4258, |
|
"eval_samples_per_second": 9.255, |
|
"eval_steps_per_second": 1.61, |
|
"step": 5332 |
|
}, |
|
{ |
|
"epoch": 277.97, |
|
"eval_gen_len": 0.2261, |
|
"eval_loss": 0.7907570600509644, |
|
"eval_rouge1": 0.0002, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0002, |
|
"eval_rougeLsum": 0.0002, |
|
"eval_runtime": 12.5909, |
|
"eval_samples_per_second": 9.134, |
|
"eval_steps_per_second": 1.588, |
|
"step": 5351 |
|
}, |
|
{ |
|
"epoch": 278.96, |
|
"eval_gen_len": 0.2, |
|
"eval_loss": 0.7891109585762024, |
|
"eval_rouge1": 0.0002, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0002, |
|
"eval_rougeLsum": 0.0002, |
|
"eval_runtime": 13.0018, |
|
"eval_samples_per_second": 8.845, |
|
"eval_steps_per_second": 1.538, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 280.0, |
|
"eval_gen_len": 0.1826, |
|
"eval_loss": 0.787673830986023, |
|
"eval_rouge1": 0.0002, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0002, |
|
"eval_rougeLsum": 0.0002, |
|
"eval_runtime": 12.0698, |
|
"eval_samples_per_second": 9.528, |
|
"eval_steps_per_second": 1.657, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 280.99, |
|
"eval_gen_len": 0.1739, |
|
"eval_loss": 0.785959005355835, |
|
"eval_rouge1": 0.0002, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0002, |
|
"eval_rougeLsum": 0.0002, |
|
"eval_runtime": 12.4254, |
|
"eval_samples_per_second": 9.255, |
|
"eval_steps_per_second": 1.61, |
|
"step": 5409 |
|
}, |
|
{ |
|
"epoch": 281.97, |
|
"eval_gen_len": 0.1739, |
|
"eval_loss": 0.7843196988105774, |
|
"eval_rouge1": 0.0002, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0002, |
|
"eval_rougeLsum": 0.0002, |
|
"eval_runtime": 12.6986, |
|
"eval_samples_per_second": 9.056, |
|
"eval_steps_per_second": 1.575, |
|
"step": 5428 |
|
}, |
|
{ |
|
"epoch": 282.96, |
|
"eval_gen_len": 0.1739, |
|
"eval_loss": 0.7826663851737976, |
|
"eval_rouge1": 0.0002, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0002, |
|
"eval_rougeLsum": 0.0002, |
|
"eval_runtime": 12.2827, |
|
"eval_samples_per_second": 9.363, |
|
"eval_steps_per_second": 1.628, |
|
"step": 5447 |
|
}, |
|
{ |
|
"epoch": 284.0, |
|
"eval_gen_len": 0.1739, |
|
"eval_loss": 0.7811480760574341, |
|
"eval_rouge1": 0.0002, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0002, |
|
"eval_rougeLsum": 0.0002, |
|
"eval_runtime": 12.6013, |
|
"eval_samples_per_second": 9.126, |
|
"eval_steps_per_second": 1.587, |
|
"step": 5467 |
|
}, |
|
{ |
|
"epoch": 284.99, |
|
"eval_gen_len": 0.1652, |
|
"eval_loss": 0.7799001932144165, |
|
"eval_rouge1": 0.0002, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0002, |
|
"eval_rougeLsum": 0.0002, |
|
"eval_runtime": 12.3727, |
|
"eval_samples_per_second": 9.295, |
|
"eval_steps_per_second": 1.616, |
|
"step": 5486 |
|
}, |
|
{ |
|
"epoch": 285.71, |
|
"grad_norm": 0.36042362451553345, |
|
"learning_rate": 4.214736842105263e-06, |
|
"loss": 0.8855, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 285.97, |
|
"eval_gen_len": 0.1652, |
|
"eval_loss": 0.7784348726272583, |
|
"eval_rouge1": 0.0002, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0002, |
|
"eval_rougeLsum": 0.0002, |
|
"eval_runtime": 12.4298, |
|
"eval_samples_per_second": 9.252, |
|
"eval_steps_per_second": 1.609, |
|
"step": 5505 |
|
}, |
|
{ |
|
"epoch": 286.96, |
|
"eval_gen_len": 0.1652, |
|
"eval_loss": 0.7772350311279297, |
|
"eval_rouge1": 0.0002, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0002, |
|
"eval_rougeLsum": 0.0002, |
|
"eval_runtime": 12.5935, |
|
"eval_samples_per_second": 9.132, |
|
"eval_steps_per_second": 1.588, |
|
"step": 5524 |
|
}, |
|
{ |
|
"epoch": 288.0, |
|
"eval_gen_len": 0.1652, |
|
"eval_loss": 0.775896430015564, |
|
"eval_rouge1": 0.0002, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0002, |
|
"eval_rougeLsum": 0.0002, |
|
"eval_runtime": 12.6907, |
|
"eval_samples_per_second": 9.062, |
|
"eval_steps_per_second": 1.576, |
|
"step": 5544 |
|
}, |
|
{ |
|
"epoch": 288.99, |
|
"eval_gen_len": 0.1652, |
|
"eval_loss": 0.7743993401527405, |
|
"eval_rouge1": 0.0002, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0002, |
|
"eval_rougeLsum": 0.0002, |
|
"eval_runtime": 12.7195, |
|
"eval_samples_per_second": 9.041, |
|
"eval_steps_per_second": 1.572, |
|
"step": 5563 |
|
}, |
|
{ |
|
"epoch": 289.97, |
|
"eval_gen_len": 0.1652, |
|
"eval_loss": 0.7728458046913147, |
|
"eval_rouge1": 0.0002, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0002, |
|
"eval_rougeLsum": 0.0002, |
|
"eval_runtime": 12.71, |
|
"eval_samples_per_second": 9.048, |
|
"eval_steps_per_second": 1.574, |
|
"step": 5582 |
|
}, |
|
{ |
|
"epoch": 290.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7715795636177063, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.6969, |
|
"eval_samples_per_second": 9.057, |
|
"eval_steps_per_second": 1.575, |
|
"step": 5601 |
|
}, |
|
{ |
|
"epoch": 292.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7701930999755859, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.902, |
|
"eval_samples_per_second": 8.913, |
|
"eval_steps_per_second": 1.55, |
|
"step": 5621 |
|
}, |
|
{ |
|
"epoch": 292.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7691376209259033, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.3513, |
|
"eval_samples_per_second": 9.311, |
|
"eval_steps_per_second": 1.619, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 293.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7679579257965088, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.4173, |
|
"eval_samples_per_second": 9.261, |
|
"eval_steps_per_second": 1.611, |
|
"step": 5659 |
|
}, |
|
{ |
|
"epoch": 294.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7667289972305298, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.6438, |
|
"eval_samples_per_second": 9.095, |
|
"eval_steps_per_second": 1.582, |
|
"step": 5678 |
|
}, |
|
{ |
|
"epoch": 296.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7650233507156372, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.1233, |
|
"eval_samples_per_second": 9.486, |
|
"eval_steps_per_second": 1.65, |
|
"step": 5698 |
|
}, |
|
{ |
|
"epoch": 296.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7638988494873047, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.1018, |
|
"eval_samples_per_second": 9.503, |
|
"eval_steps_per_second": 1.653, |
|
"step": 5717 |
|
}, |
|
{ |
|
"epoch": 297.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7627271413803101, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.0736, |
|
"eval_samples_per_second": 9.525, |
|
"eval_steps_per_second": 1.657, |
|
"step": 5736 |
|
}, |
|
{ |
|
"epoch": 298.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.761401891708374, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.4179, |
|
"eval_samples_per_second": 9.261, |
|
"eval_steps_per_second": 1.611, |
|
"step": 5755 |
|
}, |
|
{ |
|
"epoch": 300.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7603045105934143, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.1939, |
|
"eval_samples_per_second": 9.431, |
|
"eval_steps_per_second": 1.64, |
|
"step": 5775 |
|
}, |
|
{ |
|
"epoch": 300.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7593241333961487, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.4302, |
|
"eval_samples_per_second": 9.252, |
|
"eval_steps_per_second": 1.609, |
|
"step": 5794 |
|
}, |
|
{ |
|
"epoch": 301.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7581080198287964, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.7272, |
|
"eval_samples_per_second": 9.036, |
|
"eval_steps_per_second": 1.571, |
|
"step": 5813 |
|
}, |
|
{ |
|
"epoch": 302.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7565290927886963, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.7621, |
|
"eval_samples_per_second": 9.011, |
|
"eval_steps_per_second": 1.567, |
|
"step": 5832 |
|
}, |
|
{ |
|
"epoch": 304.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7556654810905457, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.4563, |
|
"eval_samples_per_second": 9.232, |
|
"eval_steps_per_second": 1.606, |
|
"step": 5852 |
|
}, |
|
{ |
|
"epoch": 304.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.754369854927063, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.207, |
|
"eval_samples_per_second": 9.421, |
|
"eval_steps_per_second": 1.638, |
|
"step": 5871 |
|
}, |
|
{ |
|
"epoch": 305.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7534385919570923, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.6305, |
|
"eval_samples_per_second": 9.105, |
|
"eval_steps_per_second": 1.583, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 306.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7526547908782959, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.4513, |
|
"eval_samples_per_second": 9.236, |
|
"eval_steps_per_second": 1.606, |
|
"step": 5909 |
|
}, |
|
{ |
|
"epoch": 308.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7513379454612732, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.349, |
|
"eval_samples_per_second": 9.312, |
|
"eval_steps_per_second": 1.62, |
|
"step": 5929 |
|
}, |
|
{ |
|
"epoch": 308.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7506363987922668, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.0976, |
|
"eval_samples_per_second": 9.506, |
|
"eval_steps_per_second": 1.653, |
|
"step": 5948 |
|
}, |
|
{ |
|
"epoch": 309.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7496155500411987, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.3015, |
|
"eval_samples_per_second": 9.348, |
|
"eval_steps_per_second": 1.626, |
|
"step": 5967 |
|
}, |
|
{ |
|
"epoch": 310.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7488384246826172, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.3367, |
|
"eval_samples_per_second": 9.322, |
|
"eval_steps_per_second": 1.621, |
|
"step": 5986 |
|
}, |
|
{ |
|
"epoch": 311.69, |
|
"grad_norm": 0.3260189890861511, |
|
"learning_rate": 3.6884210526315794e-06, |
|
"loss": 0.8402, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 312.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7474696040153503, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.3992, |
|
"eval_samples_per_second": 9.275, |
|
"eval_steps_per_second": 1.613, |
|
"step": 6006 |
|
}, |
|
{ |
|
"epoch": 312.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7464930415153503, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 11.9232, |
|
"eval_samples_per_second": 9.645, |
|
"eval_steps_per_second": 1.677, |
|
"step": 6025 |
|
}, |
|
{ |
|
"epoch": 313.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7456102967262268, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.6801, |
|
"eval_samples_per_second": 9.069, |
|
"eval_steps_per_second": 1.577, |
|
"step": 6044 |
|
}, |
|
{ |
|
"epoch": 314.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7446662783622742, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.3568, |
|
"eval_samples_per_second": 9.307, |
|
"eval_steps_per_second": 1.619, |
|
"step": 6063 |
|
}, |
|
{ |
|
"epoch": 316.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7433856725692749, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 11.987, |
|
"eval_samples_per_second": 9.594, |
|
"eval_steps_per_second": 1.668, |
|
"step": 6083 |
|
}, |
|
{ |
|
"epoch": 316.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7426111698150635, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.6966, |
|
"eval_samples_per_second": 9.058, |
|
"eval_steps_per_second": 1.575, |
|
"step": 6102 |
|
}, |
|
{ |
|
"epoch": 317.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7413787841796875, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.2025, |
|
"eval_samples_per_second": 9.424, |
|
"eval_steps_per_second": 1.639, |
|
"step": 6121 |
|
}, |
|
{ |
|
"epoch": 318.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7404425144195557, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.9943, |
|
"eval_samples_per_second": 8.85, |
|
"eval_steps_per_second": 1.539, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 320.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7396877408027649, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.3803, |
|
"eval_samples_per_second": 9.289, |
|
"eval_steps_per_second": 1.615, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 320.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.739030122756958, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 13.3182, |
|
"eval_samples_per_second": 8.635, |
|
"eval_steps_per_second": 1.502, |
|
"step": 6179 |
|
}, |
|
{ |
|
"epoch": 321.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7381538152694702, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.7464, |
|
"eval_samples_per_second": 9.022, |
|
"eval_steps_per_second": 1.569, |
|
"step": 6198 |
|
}, |
|
{ |
|
"epoch": 322.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7372981309890747, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.1988, |
|
"eval_samples_per_second": 9.427, |
|
"eval_steps_per_second": 1.64, |
|
"step": 6217 |
|
}, |
|
{ |
|
"epoch": 324.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7361249327659607, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 13.3472, |
|
"eval_samples_per_second": 8.616, |
|
"eval_steps_per_second": 1.498, |
|
"step": 6237 |
|
}, |
|
{ |
|
"epoch": 324.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.735177218914032, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.2208, |
|
"eval_samples_per_second": 9.41, |
|
"eval_steps_per_second": 1.637, |
|
"step": 6256 |
|
}, |
|
{ |
|
"epoch": 325.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7344561219215393, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.9594, |
|
"eval_samples_per_second": 8.874, |
|
"eval_steps_per_second": 1.543, |
|
"step": 6275 |
|
}, |
|
{ |
|
"epoch": 326.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7334731817245483, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 13.1094, |
|
"eval_samples_per_second": 8.772, |
|
"eval_steps_per_second": 1.526, |
|
"step": 6294 |
|
}, |
|
{ |
|
"epoch": 328.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7326551079750061, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.7729, |
|
"eval_samples_per_second": 9.003, |
|
"eval_steps_per_second": 1.566, |
|
"step": 6314 |
|
}, |
|
{ |
|
"epoch": 328.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7316291332244873, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.415, |
|
"eval_samples_per_second": 9.263, |
|
"eval_steps_per_second": 1.611, |
|
"step": 6333 |
|
}, |
|
{ |
|
"epoch": 329.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7311994433403015, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.9044, |
|
"eval_samples_per_second": 8.912, |
|
"eval_steps_per_second": 1.55, |
|
"step": 6352 |
|
}, |
|
{ |
|
"epoch": 330.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7306154370307922, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 13.0335, |
|
"eval_samples_per_second": 8.823, |
|
"eval_steps_per_second": 1.535, |
|
"step": 6371 |
|
}, |
|
{ |
|
"epoch": 332.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7298057675361633, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.5384, |
|
"eval_samples_per_second": 9.172, |
|
"eval_steps_per_second": 1.595, |
|
"step": 6391 |
|
}, |
|
{ |
|
"epoch": 332.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7290323972702026, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.7202, |
|
"eval_samples_per_second": 9.041, |
|
"eval_steps_per_second": 1.572, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 333.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7283275127410889, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.5197, |
|
"eval_samples_per_second": 9.186, |
|
"eval_steps_per_second": 1.597, |
|
"step": 6429 |
|
}, |
|
{ |
|
"epoch": 334.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7273982763290405, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.6808, |
|
"eval_samples_per_second": 9.069, |
|
"eval_steps_per_second": 1.577, |
|
"step": 6448 |
|
}, |
|
{ |
|
"epoch": 336.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7265883684158325, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 13.0488, |
|
"eval_samples_per_second": 8.813, |
|
"eval_steps_per_second": 1.533, |
|
"step": 6468 |
|
}, |
|
{ |
|
"epoch": 336.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7261592745780945, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.6573, |
|
"eval_samples_per_second": 9.086, |
|
"eval_steps_per_second": 1.58, |
|
"step": 6487 |
|
}, |
|
{ |
|
"epoch": 337.66, |
|
"grad_norm": 0.288989782333374, |
|
"learning_rate": 3.1621052631578953e-06, |
|
"loss": 0.8058, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 337.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7252629995346069, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.4295, |
|
"eval_samples_per_second": 9.252, |
|
"eval_steps_per_second": 1.609, |
|
"step": 6506 |
|
}, |
|
{ |
|
"epoch": 338.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7245468497276306, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.7865, |
|
"eval_samples_per_second": 8.994, |
|
"eval_steps_per_second": 1.564, |
|
"step": 6525 |
|
}, |
|
{ |
|
"epoch": 340.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7239726185798645, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.9112, |
|
"eval_samples_per_second": 8.907, |
|
"eval_steps_per_second": 1.549, |
|
"step": 6545 |
|
}, |
|
{ |
|
"epoch": 340.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7231466770172119, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.5487, |
|
"eval_samples_per_second": 9.164, |
|
"eval_steps_per_second": 1.594, |
|
"step": 6564 |
|
}, |
|
{ |
|
"epoch": 341.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7223904728889465, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 13.3347, |
|
"eval_samples_per_second": 8.624, |
|
"eval_steps_per_second": 1.5, |
|
"step": 6583 |
|
}, |
|
{ |
|
"epoch": 342.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7218188643455505, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 13.0031, |
|
"eval_samples_per_second": 8.844, |
|
"eval_steps_per_second": 1.538, |
|
"step": 6602 |
|
}, |
|
{ |
|
"epoch": 344.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7209810614585876, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.4608, |
|
"eval_samples_per_second": 9.229, |
|
"eval_steps_per_second": 1.605, |
|
"step": 6622 |
|
}, |
|
{ |
|
"epoch": 344.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7203324437141418, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.5304, |
|
"eval_samples_per_second": 9.178, |
|
"eval_steps_per_second": 1.596, |
|
"step": 6641 |
|
}, |
|
{ |
|
"epoch": 345.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7196723818778992, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.6257, |
|
"eval_samples_per_second": 9.108, |
|
"eval_steps_per_second": 1.584, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 346.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7190775275230408, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.4134, |
|
"eval_samples_per_second": 9.264, |
|
"eval_steps_per_second": 1.611, |
|
"step": 6679 |
|
}, |
|
{ |
|
"epoch": 348.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7185074090957642, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.166, |
|
"eval_samples_per_second": 9.453, |
|
"eval_steps_per_second": 1.644, |
|
"step": 6699 |
|
}, |
|
{ |
|
"epoch": 348.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7180371880531311, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.559, |
|
"eval_samples_per_second": 9.157, |
|
"eval_steps_per_second": 1.592, |
|
"step": 6718 |
|
}, |
|
{ |
|
"epoch": 349.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.717097818851471, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.3, |
|
"eval_samples_per_second": 9.35, |
|
"eval_steps_per_second": 1.626, |
|
"step": 6737 |
|
}, |
|
{ |
|
"epoch": 350.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7164217829704285, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.3583, |
|
"eval_samples_per_second": 9.306, |
|
"eval_steps_per_second": 1.618, |
|
"step": 6756 |
|
}, |
|
{ |
|
"epoch": 352.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7158520817756653, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 13.0047, |
|
"eval_samples_per_second": 8.843, |
|
"eval_steps_per_second": 1.538, |
|
"step": 6776 |
|
}, |
|
{ |
|
"epoch": 352.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7151947021484375, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.6741, |
|
"eval_samples_per_second": 9.074, |
|
"eval_steps_per_second": 1.578, |
|
"step": 6795 |
|
}, |
|
{ |
|
"epoch": 353.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7145124077796936, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.6036, |
|
"eval_samples_per_second": 9.124, |
|
"eval_steps_per_second": 1.587, |
|
"step": 6814 |
|
}, |
|
{ |
|
"epoch": 354.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7140352725982666, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.2129, |
|
"eval_samples_per_second": 9.416, |
|
"eval_steps_per_second": 1.638, |
|
"step": 6833 |
|
}, |
|
{ |
|
"epoch": 356.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7135369777679443, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.4624, |
|
"eval_samples_per_second": 9.228, |
|
"eval_steps_per_second": 1.605, |
|
"step": 6853 |
|
}, |
|
{ |
|
"epoch": 356.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7128369808197021, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.422, |
|
"eval_samples_per_second": 9.258, |
|
"eval_steps_per_second": 1.61, |
|
"step": 6872 |
|
}, |
|
{ |
|
"epoch": 357.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7122591137886047, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.2268, |
|
"eval_samples_per_second": 9.406, |
|
"eval_steps_per_second": 1.636, |
|
"step": 6891 |
|
}, |
|
{ |
|
"epoch": 358.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7116859555244446, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.866, |
|
"eval_samples_per_second": 8.938, |
|
"eval_steps_per_second": 1.554, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 360.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7112235426902771, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.4942, |
|
"eval_samples_per_second": 9.204, |
|
"eval_steps_per_second": 1.601, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 360.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7106695771217346, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.4042, |
|
"eval_samples_per_second": 9.271, |
|
"eval_steps_per_second": 1.612, |
|
"step": 6949 |
|
}, |
|
{ |
|
"epoch": 361.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.710101306438446, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.2879, |
|
"eval_samples_per_second": 9.359, |
|
"eval_steps_per_second": 1.628, |
|
"step": 6968 |
|
}, |
|
{ |
|
"epoch": 362.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7094107270240784, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.269, |
|
"eval_samples_per_second": 9.373, |
|
"eval_steps_per_second": 1.63, |
|
"step": 6987 |
|
}, |
|
{ |
|
"epoch": 363.64, |
|
"grad_norm": 0.44062402844429016, |
|
"learning_rate": 2.635789473684211e-06, |
|
"loss": 0.7798, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 364.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.708891749382019, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.9085, |
|
"eval_samples_per_second": 8.909, |
|
"eval_steps_per_second": 1.549, |
|
"step": 7007 |
|
}, |
|
{ |
|
"epoch": 364.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7082711458206177, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.1554, |
|
"eval_samples_per_second": 9.461, |
|
"eval_steps_per_second": 1.645, |
|
"step": 7026 |
|
}, |
|
{ |
|
"epoch": 365.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7078844904899597, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.6546, |
|
"eval_samples_per_second": 9.088, |
|
"eval_steps_per_second": 1.58, |
|
"step": 7045 |
|
}, |
|
{ |
|
"epoch": 366.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7073128819465637, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.7172, |
|
"eval_samples_per_second": 9.043, |
|
"eval_steps_per_second": 1.573, |
|
"step": 7064 |
|
}, |
|
{ |
|
"epoch": 368.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7066096067428589, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.1125, |
|
"eval_samples_per_second": 9.494, |
|
"eval_steps_per_second": 1.651, |
|
"step": 7084 |
|
}, |
|
{ |
|
"epoch": 368.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7057228088378906, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.5605, |
|
"eval_samples_per_second": 9.156, |
|
"eval_steps_per_second": 1.592, |
|
"step": 7103 |
|
}, |
|
{ |
|
"epoch": 369.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7048721313476562, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.3178, |
|
"eval_samples_per_second": 9.336, |
|
"eval_steps_per_second": 1.624, |
|
"step": 7122 |
|
}, |
|
{ |
|
"epoch": 370.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7041941285133362, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.2739, |
|
"eval_samples_per_second": 9.369, |
|
"eval_steps_per_second": 1.629, |
|
"step": 7141 |
|
}, |
|
{ |
|
"epoch": 372.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7035704255104065, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.3207, |
|
"eval_samples_per_second": 9.334, |
|
"eval_steps_per_second": 1.623, |
|
"step": 7161 |
|
}, |
|
{ |
|
"epoch": 372.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7029441595077515, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.094, |
|
"eval_samples_per_second": 9.509, |
|
"eval_steps_per_second": 1.654, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 373.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7022525668144226, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.7042, |
|
"eval_samples_per_second": 9.052, |
|
"eval_steps_per_second": 1.574, |
|
"step": 7199 |
|
}, |
|
{ |
|
"epoch": 374.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7016597986221313, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.3663, |
|
"eval_samples_per_second": 9.299, |
|
"eval_steps_per_second": 1.617, |
|
"step": 7218 |
|
}, |
|
{ |
|
"epoch": 376.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7011125683784485, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.2533, |
|
"eval_samples_per_second": 9.385, |
|
"eval_steps_per_second": 1.632, |
|
"step": 7238 |
|
}, |
|
{ |
|
"epoch": 376.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7006986737251282, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.4459, |
|
"eval_samples_per_second": 9.24, |
|
"eval_steps_per_second": 1.607, |
|
"step": 7257 |
|
}, |
|
{ |
|
"epoch": 377.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.7000675201416016, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.5813, |
|
"eval_samples_per_second": 9.141, |
|
"eval_steps_per_second": 1.59, |
|
"step": 7276 |
|
}, |
|
{ |
|
"epoch": 378.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6994682550430298, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.7191, |
|
"eval_samples_per_second": 9.041, |
|
"eval_steps_per_second": 1.572, |
|
"step": 7295 |
|
}, |
|
{ |
|
"epoch": 380.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6987762451171875, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.7423, |
|
"eval_samples_per_second": 9.025, |
|
"eval_steps_per_second": 1.57, |
|
"step": 7315 |
|
}, |
|
{ |
|
"epoch": 380.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6981701254844666, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.7919, |
|
"eval_samples_per_second": 8.99, |
|
"eval_steps_per_second": 1.563, |
|
"step": 7334 |
|
}, |
|
{ |
|
"epoch": 381.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6976540088653564, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.6842, |
|
"eval_samples_per_second": 9.066, |
|
"eval_steps_per_second": 1.577, |
|
"step": 7353 |
|
}, |
|
{ |
|
"epoch": 382.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6971992254257202, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.7985, |
|
"eval_samples_per_second": 8.985, |
|
"eval_steps_per_second": 1.563, |
|
"step": 7372 |
|
}, |
|
{ |
|
"epoch": 384.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6967973113059998, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.5355, |
|
"eval_samples_per_second": 9.174, |
|
"eval_steps_per_second": 1.595, |
|
"step": 7392 |
|
}, |
|
{ |
|
"epoch": 384.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6962406039237976, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.7996, |
|
"eval_samples_per_second": 8.985, |
|
"eval_steps_per_second": 1.563, |
|
"step": 7411 |
|
}, |
|
{ |
|
"epoch": 385.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6957660913467407, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.4031, |
|
"eval_samples_per_second": 9.272, |
|
"eval_steps_per_second": 1.612, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 386.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6953439116477966, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 13.2572, |
|
"eval_samples_per_second": 8.675, |
|
"eval_steps_per_second": 1.509, |
|
"step": 7449 |
|
}, |
|
{ |
|
"epoch": 388.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6948480606079102, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.5809, |
|
"eval_samples_per_second": 9.141, |
|
"eval_steps_per_second": 1.59, |
|
"step": 7469 |
|
}, |
|
{ |
|
"epoch": 388.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6944136023521423, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.3151, |
|
"eval_samples_per_second": 9.338, |
|
"eval_steps_per_second": 1.624, |
|
"step": 7488 |
|
}, |
|
{ |
|
"epoch": 389.61, |
|
"grad_norm": 0.2672542333602905, |
|
"learning_rate": 2.1094736842105264e-06, |
|
"loss": 0.7599, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 389.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6940454840660095, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.8439, |
|
"eval_samples_per_second": 8.954, |
|
"eval_steps_per_second": 1.557, |
|
"step": 7507 |
|
}, |
|
{ |
|
"epoch": 390.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.693627655506134, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.9964, |
|
"eval_samples_per_second": 8.849, |
|
"eval_steps_per_second": 1.539, |
|
"step": 7526 |
|
}, |
|
{ |
|
"epoch": 392.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6933034062385559, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.8761, |
|
"eval_samples_per_second": 8.931, |
|
"eval_steps_per_second": 1.553, |
|
"step": 7546 |
|
}, |
|
{ |
|
"epoch": 392.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6929065585136414, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 13.196, |
|
"eval_samples_per_second": 8.715, |
|
"eval_steps_per_second": 1.516, |
|
"step": 7565 |
|
}, |
|
{ |
|
"epoch": 393.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6925193667411804, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.7742, |
|
"eval_samples_per_second": 9.002, |
|
"eval_steps_per_second": 1.566, |
|
"step": 7584 |
|
}, |
|
{ |
|
"epoch": 394.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6921875476837158, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.2942, |
|
"eval_samples_per_second": 9.354, |
|
"eval_steps_per_second": 1.627, |
|
"step": 7603 |
|
}, |
|
{ |
|
"epoch": 396.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6919534802436829, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.7028, |
|
"eval_samples_per_second": 9.053, |
|
"eval_steps_per_second": 1.574, |
|
"step": 7623 |
|
}, |
|
{ |
|
"epoch": 396.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6915541291236877, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.1374, |
|
"eval_samples_per_second": 9.475, |
|
"eval_steps_per_second": 1.648, |
|
"step": 7642 |
|
}, |
|
{ |
|
"epoch": 397.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6912309527397156, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.374, |
|
"eval_samples_per_second": 9.294, |
|
"eval_steps_per_second": 1.616, |
|
"step": 7661 |
|
}, |
|
{ |
|
"epoch": 398.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6908969879150391, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.8679, |
|
"eval_samples_per_second": 8.937, |
|
"eval_steps_per_second": 1.554, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 400.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6906691193580627, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.8987, |
|
"eval_samples_per_second": 8.916, |
|
"eval_steps_per_second": 1.551, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 400.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6903204321861267, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.7882, |
|
"eval_samples_per_second": 8.993, |
|
"eval_steps_per_second": 1.564, |
|
"step": 7719 |
|
}, |
|
{ |
|
"epoch": 401.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6900023221969604, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 13.052, |
|
"eval_samples_per_second": 8.811, |
|
"eval_steps_per_second": 1.532, |
|
"step": 7738 |
|
}, |
|
{ |
|
"epoch": 402.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6896329522132874, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.921, |
|
"eval_samples_per_second": 8.9, |
|
"eval_steps_per_second": 1.548, |
|
"step": 7757 |
|
}, |
|
{ |
|
"epoch": 404.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6893720030784607, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.755, |
|
"eval_samples_per_second": 9.016, |
|
"eval_steps_per_second": 1.568, |
|
"step": 7777 |
|
}, |
|
{ |
|
"epoch": 404.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6890887022018433, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.8106, |
|
"eval_samples_per_second": 8.977, |
|
"eval_steps_per_second": 1.561, |
|
"step": 7796 |
|
}, |
|
{ |
|
"epoch": 405.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6887722611427307, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.5889, |
|
"eval_samples_per_second": 9.135, |
|
"eval_steps_per_second": 1.589, |
|
"step": 7815 |
|
}, |
|
{ |
|
"epoch": 406.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6884374022483826, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.5505, |
|
"eval_samples_per_second": 9.163, |
|
"eval_steps_per_second": 1.594, |
|
"step": 7834 |
|
}, |
|
{ |
|
"epoch": 408.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6880633234977722, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.2243, |
|
"eval_samples_per_second": 9.407, |
|
"eval_steps_per_second": 1.636, |
|
"step": 7854 |
|
}, |
|
{ |
|
"epoch": 408.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6877562999725342, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.519, |
|
"eval_samples_per_second": 9.186, |
|
"eval_steps_per_second": 1.598, |
|
"step": 7873 |
|
}, |
|
{ |
|
"epoch": 409.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6874319314956665, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.2922, |
|
"eval_samples_per_second": 9.356, |
|
"eval_steps_per_second": 1.627, |
|
"step": 7892 |
|
}, |
|
{ |
|
"epoch": 410.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6871966123580933, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.4238, |
|
"eval_samples_per_second": 9.256, |
|
"eval_steps_per_second": 1.61, |
|
"step": 7911 |
|
}, |
|
{ |
|
"epoch": 412.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6869640350341797, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.3603, |
|
"eval_samples_per_second": 9.304, |
|
"eval_steps_per_second": 1.618, |
|
"step": 7931 |
|
}, |
|
{ |
|
"epoch": 412.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6867266297340393, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.7522, |
|
"eval_samples_per_second": 9.018, |
|
"eval_steps_per_second": 1.568, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 413.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6865308880805969, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.7581, |
|
"eval_samples_per_second": 9.014, |
|
"eval_steps_per_second": 1.568, |
|
"step": 7969 |
|
}, |
|
{ |
|
"epoch": 414.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6863205432891846, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 13.0743, |
|
"eval_samples_per_second": 8.796, |
|
"eval_steps_per_second": 1.53, |
|
"step": 7988 |
|
}, |
|
{ |
|
"epoch": 415.58, |
|
"grad_norm": 0.24980388581752777, |
|
"learning_rate": 1.5831578947368423e-06, |
|
"loss": 0.7446, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 416.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6860491037368774, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.4592, |
|
"eval_samples_per_second": 9.23, |
|
"eval_steps_per_second": 1.605, |
|
"step": 8008 |
|
}, |
|
{ |
|
"epoch": 416.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6857825517654419, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.6928, |
|
"eval_samples_per_second": 9.06, |
|
"eval_steps_per_second": 1.576, |
|
"step": 8027 |
|
}, |
|
{ |
|
"epoch": 417.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6855095028877258, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.4988, |
|
"eval_samples_per_second": 9.201, |
|
"eval_steps_per_second": 1.6, |
|
"step": 8046 |
|
}, |
|
{ |
|
"epoch": 418.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6851878762245178, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.7595, |
|
"eval_samples_per_second": 9.013, |
|
"eval_steps_per_second": 1.567, |
|
"step": 8065 |
|
}, |
|
{ |
|
"epoch": 420.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6848768591880798, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.6421, |
|
"eval_samples_per_second": 9.097, |
|
"eval_steps_per_second": 1.582, |
|
"step": 8085 |
|
}, |
|
{ |
|
"epoch": 420.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.684624969959259, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.5911, |
|
"eval_samples_per_second": 9.133, |
|
"eval_steps_per_second": 1.588, |
|
"step": 8104 |
|
}, |
|
{ |
|
"epoch": 421.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6844747066497803, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.4163, |
|
"eval_samples_per_second": 9.262, |
|
"eval_steps_per_second": 1.611, |
|
"step": 8123 |
|
}, |
|
{ |
|
"epoch": 422.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6842939257621765, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.2945, |
|
"eval_samples_per_second": 9.354, |
|
"eval_steps_per_second": 1.627, |
|
"step": 8142 |
|
}, |
|
{ |
|
"epoch": 424.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6840406060218811, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.9127, |
|
"eval_samples_per_second": 8.906, |
|
"eval_steps_per_second": 1.549, |
|
"step": 8162 |
|
}, |
|
{ |
|
"epoch": 424.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6838209629058838, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 13.1055, |
|
"eval_samples_per_second": 8.775, |
|
"eval_steps_per_second": 1.526, |
|
"step": 8181 |
|
}, |
|
{ |
|
"epoch": 425.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.683562159538269, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.6087, |
|
"eval_samples_per_second": 9.121, |
|
"eval_steps_per_second": 1.586, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 426.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6833438277244568, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.7323, |
|
"eval_samples_per_second": 9.032, |
|
"eval_steps_per_second": 1.571, |
|
"step": 8219 |
|
}, |
|
{ |
|
"epoch": 428.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6831278204917908, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.203, |
|
"eval_samples_per_second": 9.424, |
|
"eval_steps_per_second": 1.639, |
|
"step": 8239 |
|
}, |
|
{ |
|
"epoch": 428.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.682895302772522, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.6316, |
|
"eval_samples_per_second": 9.104, |
|
"eval_steps_per_second": 1.583, |
|
"step": 8258 |
|
}, |
|
{ |
|
"epoch": 429.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6826810836791992, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.3534, |
|
"eval_samples_per_second": 9.309, |
|
"eval_steps_per_second": 1.619, |
|
"step": 8277 |
|
}, |
|
{ |
|
"epoch": 430.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6824563145637512, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.1952, |
|
"eval_samples_per_second": 9.43, |
|
"eval_steps_per_second": 1.64, |
|
"step": 8296 |
|
}, |
|
{ |
|
"epoch": 432.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.682238757610321, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.3689, |
|
"eval_samples_per_second": 9.297, |
|
"eval_steps_per_second": 1.617, |
|
"step": 8316 |
|
}, |
|
{ |
|
"epoch": 432.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6820657253265381, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.7369, |
|
"eval_samples_per_second": 9.029, |
|
"eval_steps_per_second": 1.57, |
|
"step": 8335 |
|
}, |
|
{ |
|
"epoch": 433.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6819124817848206, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.3025, |
|
"eval_samples_per_second": 9.348, |
|
"eval_steps_per_second": 1.626, |
|
"step": 8354 |
|
}, |
|
{ |
|
"epoch": 434.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6817324161529541, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.3628, |
|
"eval_samples_per_second": 9.302, |
|
"eval_steps_per_second": 1.618, |
|
"step": 8373 |
|
}, |
|
{ |
|
"epoch": 436.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6815437078475952, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.1462, |
|
"eval_samples_per_second": 9.468, |
|
"eval_steps_per_second": 1.647, |
|
"step": 8393 |
|
}, |
|
{ |
|
"epoch": 436.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6813645958900452, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.5282, |
|
"eval_samples_per_second": 9.179, |
|
"eval_steps_per_second": 1.596, |
|
"step": 8412 |
|
}, |
|
{ |
|
"epoch": 437.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6811843514442444, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.2909, |
|
"eval_samples_per_second": 9.357, |
|
"eval_steps_per_second": 1.627, |
|
"step": 8431 |
|
}, |
|
{ |
|
"epoch": 438.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6810438632965088, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.1515, |
|
"eval_samples_per_second": 9.464, |
|
"eval_steps_per_second": 1.646, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 440.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6809141635894775, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.3824, |
|
"eval_samples_per_second": 9.287, |
|
"eval_steps_per_second": 1.615, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 440.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6807241439819336, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.4518, |
|
"eval_samples_per_second": 9.236, |
|
"eval_steps_per_second": 1.606, |
|
"step": 8489 |
|
}, |
|
{ |
|
"epoch": 441.56, |
|
"grad_norm": 0.2838553488254547, |
|
"learning_rate": 1.0568421052631578e-06, |
|
"loss": 0.7356, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 441.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6805526614189148, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.3144, |
|
"eval_samples_per_second": 9.339, |
|
"eval_steps_per_second": 1.624, |
|
"step": 8508 |
|
}, |
|
{ |
|
"epoch": 442.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.680397629737854, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.9271, |
|
"eval_samples_per_second": 8.896, |
|
"eval_steps_per_second": 1.547, |
|
"step": 8527 |
|
}, |
|
{ |
|
"epoch": 444.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6802446842193604, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.3804, |
|
"eval_samples_per_second": 9.289, |
|
"eval_steps_per_second": 1.615, |
|
"step": 8547 |
|
}, |
|
{ |
|
"epoch": 444.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6801106333732605, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.8946, |
|
"eval_samples_per_second": 8.918, |
|
"eval_steps_per_second": 1.551, |
|
"step": 8566 |
|
}, |
|
{ |
|
"epoch": 445.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6800308227539062, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.4608, |
|
"eval_samples_per_second": 9.229, |
|
"eval_steps_per_second": 1.605, |
|
"step": 8585 |
|
}, |
|
{ |
|
"epoch": 446.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6799614429473877, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.9415, |
|
"eval_samples_per_second": 8.886, |
|
"eval_steps_per_second": 1.545, |
|
"step": 8604 |
|
}, |
|
{ |
|
"epoch": 448.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.679858386516571, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.5796, |
|
"eval_samples_per_second": 9.142, |
|
"eval_steps_per_second": 1.59, |
|
"step": 8624 |
|
}, |
|
{ |
|
"epoch": 448.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6797196865081787, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.4994, |
|
"eval_samples_per_second": 9.2, |
|
"eval_steps_per_second": 1.6, |
|
"step": 8643 |
|
}, |
|
{ |
|
"epoch": 449.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6795772910118103, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.4291, |
|
"eval_samples_per_second": 9.252, |
|
"eval_steps_per_second": 1.609, |
|
"step": 8662 |
|
}, |
|
{ |
|
"epoch": 450.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6794358491897583, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 13.1622, |
|
"eval_samples_per_second": 8.737, |
|
"eval_steps_per_second": 1.519, |
|
"step": 8681 |
|
}, |
|
{ |
|
"epoch": 452.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6792973279953003, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.1066, |
|
"eval_samples_per_second": 9.499, |
|
"eval_steps_per_second": 1.652, |
|
"step": 8701 |
|
}, |
|
{ |
|
"epoch": 452.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6791619658470154, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.3561, |
|
"eval_samples_per_second": 9.307, |
|
"eval_steps_per_second": 1.619, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 453.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6790581345558167, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.6336, |
|
"eval_samples_per_second": 9.103, |
|
"eval_steps_per_second": 1.583, |
|
"step": 8739 |
|
}, |
|
{ |
|
"epoch": 454.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6789625883102417, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.656, |
|
"eval_samples_per_second": 9.087, |
|
"eval_steps_per_second": 1.58, |
|
"step": 8758 |
|
}, |
|
{ |
|
"epoch": 456.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6788202524185181, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.137, |
|
"eval_samples_per_second": 9.475, |
|
"eval_steps_per_second": 1.648, |
|
"step": 8778 |
|
}, |
|
{ |
|
"epoch": 456.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6787087917327881, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.7992, |
|
"eval_samples_per_second": 8.985, |
|
"eval_steps_per_second": 1.563, |
|
"step": 8797 |
|
}, |
|
{ |
|
"epoch": 457.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6786181330680847, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.3988, |
|
"eval_samples_per_second": 9.275, |
|
"eval_steps_per_second": 1.613, |
|
"step": 8816 |
|
}, |
|
{ |
|
"epoch": 458.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6785180568695068, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.6594, |
|
"eval_samples_per_second": 9.084, |
|
"eval_steps_per_second": 1.58, |
|
"step": 8835 |
|
}, |
|
{ |
|
"epoch": 460.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6784265637397766, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.5546, |
|
"eval_samples_per_second": 9.16, |
|
"eval_steps_per_second": 1.593, |
|
"step": 8855 |
|
}, |
|
{ |
|
"epoch": 460.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6783391237258911, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.6584, |
|
"eval_samples_per_second": 9.085, |
|
"eval_steps_per_second": 1.58, |
|
"step": 8874 |
|
}, |
|
{ |
|
"epoch": 461.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6782403588294983, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.7436, |
|
"eval_samples_per_second": 9.024, |
|
"eval_steps_per_second": 1.569, |
|
"step": 8893 |
|
}, |
|
{ |
|
"epoch": 462.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6781213283538818, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.2339, |
|
"eval_samples_per_second": 9.4, |
|
"eval_steps_per_second": 1.635, |
|
"step": 8912 |
|
}, |
|
{ |
|
"epoch": 464.0, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6779915690422058, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.6842, |
|
"eval_samples_per_second": 9.066, |
|
"eval_steps_per_second": 1.577, |
|
"step": 8932 |
|
}, |
|
{ |
|
"epoch": 464.99, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6779080629348755, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.5599, |
|
"eval_samples_per_second": 9.156, |
|
"eval_steps_per_second": 1.592, |
|
"step": 8951 |
|
}, |
|
{ |
|
"epoch": 465.97, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6778501868247986, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.7749, |
|
"eval_samples_per_second": 9.002, |
|
"eval_steps_per_second": 1.566, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 466.96, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.6777594685554504, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.4165, |
|
"eval_samples_per_second": 9.262, |
|
"eval_steps_per_second": 1.611, |
|
"step": 8989 |
|
}, |
|
{ |
|
"epoch": 467.53, |
|
"grad_norm": 0.2774975597858429, |
|
"learning_rate": 5.305263157894737e-07, |
|
"loss": 0.73, |
|
"step": 9000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 9500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 500, |
|
"save_steps": 500, |
|
"total_flos": 2.622383995402322e+17, |
|
"train_batch_size": 6, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|