|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 233.76623376623377, |
|
"eval_steps": 500, |
|
"global_step": 4500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.99, |
|
"eval_gen_len": 14.487, |
|
"eval_loss": 21.441953659057617, |
|
"eval_rouge1": 0.0832, |
|
"eval_rouge2": 0.0126, |
|
"eval_rougeL": 0.063, |
|
"eval_rougeLsum": 0.0631, |
|
"eval_runtime": 16.3828, |
|
"eval_samples_per_second": 7.02, |
|
"eval_steps_per_second": 1.221, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_gen_len": 14.6261, |
|
"eval_loss": 21.211212158203125, |
|
"eval_rouge1": 0.0858, |
|
"eval_rouge2": 0.014, |
|
"eval_rougeL": 0.0648, |
|
"eval_rougeLsum": 0.0652, |
|
"eval_runtime": 15.2254, |
|
"eval_samples_per_second": 7.553, |
|
"eval_steps_per_second": 1.314, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_gen_len": 14.6783, |
|
"eval_loss": 20.936405181884766, |
|
"eval_rouge1": 0.0866, |
|
"eval_rouge2": 0.0147, |
|
"eval_rougeL": 0.0655, |
|
"eval_rougeLsum": 0.066, |
|
"eval_runtime": 18.5865, |
|
"eval_samples_per_second": 6.187, |
|
"eval_steps_per_second": 1.076, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_gen_len": 14.8522, |
|
"eval_loss": 20.667041778564453, |
|
"eval_rouge1": 0.088, |
|
"eval_rouge2": 0.0145, |
|
"eval_rougeL": 0.0659, |
|
"eval_rougeLsum": 0.0663, |
|
"eval_runtime": 12.6199, |
|
"eval_samples_per_second": 9.113, |
|
"eval_steps_per_second": 1.585, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_gen_len": 15.113, |
|
"eval_loss": 20.46696662902832, |
|
"eval_rouge1": 0.0912, |
|
"eval_rouge2": 0.0145, |
|
"eval_rougeL": 0.0677, |
|
"eval_rougeLsum": 0.0677, |
|
"eval_runtime": 15.7216, |
|
"eval_samples_per_second": 7.315, |
|
"eval_steps_per_second": 1.272, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"eval_gen_len": 15.2087, |
|
"eval_loss": 20.282737731933594, |
|
"eval_rouge1": 0.0913, |
|
"eval_rouge2": 0.0145, |
|
"eval_rougeL": 0.0679, |
|
"eval_rougeLsum": 0.068, |
|
"eval_runtime": 15.2977, |
|
"eval_samples_per_second": 7.517, |
|
"eval_steps_per_second": 1.307, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"eval_gen_len": 15.4087, |
|
"eval_loss": 20.09146499633789, |
|
"eval_rouge1": 0.0918, |
|
"eval_rouge2": 0.0137, |
|
"eval_rougeL": 0.0686, |
|
"eval_rougeLsum": 0.0687, |
|
"eval_runtime": 16.9649, |
|
"eval_samples_per_second": 6.779, |
|
"eval_steps_per_second": 1.179, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_gen_len": 16.0435, |
|
"eval_loss": 19.872163772583008, |
|
"eval_rouge1": 0.0969, |
|
"eval_rouge2": 0.0164, |
|
"eval_rougeL": 0.0736, |
|
"eval_rougeLsum": 0.0737, |
|
"eval_runtime": 15.3373, |
|
"eval_samples_per_second": 7.498, |
|
"eval_steps_per_second": 1.304, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_gen_len": 16.5739, |
|
"eval_loss": 19.655122756958008, |
|
"eval_rouge1": 0.1052, |
|
"eval_rouge2": 0.0198, |
|
"eval_rougeL": 0.0799, |
|
"eval_rougeLsum": 0.0796, |
|
"eval_runtime": 13.8235, |
|
"eval_samples_per_second": 8.319, |
|
"eval_steps_per_second": 1.447, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"eval_gen_len": 17.0435, |
|
"eval_loss": 19.420446395874023, |
|
"eval_rouge1": 0.1071, |
|
"eval_rouge2": 0.0188, |
|
"eval_rougeL": 0.0809, |
|
"eval_rougeLsum": 0.0808, |
|
"eval_runtime": 14.7139, |
|
"eval_samples_per_second": 7.816, |
|
"eval_steps_per_second": 1.359, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 10.96, |
|
"eval_gen_len": 17.1913, |
|
"eval_loss": 19.156597137451172, |
|
"eval_rouge1": 0.1061, |
|
"eval_rouge2": 0.0185, |
|
"eval_rougeL": 0.0815, |
|
"eval_rougeLsum": 0.0819, |
|
"eval_runtime": 14.1553, |
|
"eval_samples_per_second": 8.124, |
|
"eval_steps_per_second": 1.413, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_gen_len": 17.2522, |
|
"eval_loss": 18.833667755126953, |
|
"eval_rouge1": 0.1069, |
|
"eval_rouge2": 0.0213, |
|
"eval_rougeL": 0.0826, |
|
"eval_rougeLsum": 0.0828, |
|
"eval_runtime": 15.232, |
|
"eval_samples_per_second": 7.55, |
|
"eval_steps_per_second": 1.313, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"eval_gen_len": 17.287, |
|
"eval_loss": 18.463964462280273, |
|
"eval_rouge1": 0.1105, |
|
"eval_rouge2": 0.0234, |
|
"eval_rougeL": 0.0858, |
|
"eval_rougeLsum": 0.0852, |
|
"eval_runtime": 15.0679, |
|
"eval_samples_per_second": 7.632, |
|
"eval_steps_per_second": 1.327, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 13.97, |
|
"eval_gen_len": 17.4696, |
|
"eval_loss": 18.000520706176758, |
|
"eval_rouge1": 0.1101, |
|
"eval_rouge2": 0.0232, |
|
"eval_rougeL": 0.0873, |
|
"eval_rougeLsum": 0.0872, |
|
"eval_runtime": 14.073, |
|
"eval_samples_per_second": 8.172, |
|
"eval_steps_per_second": 1.421, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 14.96, |
|
"eval_gen_len": 17.2261, |
|
"eval_loss": 17.395872116088867, |
|
"eval_rouge1": 0.103, |
|
"eval_rouge2": 0.023, |
|
"eval_rougeL": 0.0821, |
|
"eval_rougeLsum": 0.0819, |
|
"eval_runtime": 17.5824, |
|
"eval_samples_per_second": 6.541, |
|
"eval_steps_per_second": 1.138, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_gen_len": 17.6783, |
|
"eval_loss": 16.634456634521484, |
|
"eval_rouge1": 0.1034, |
|
"eval_rouge2": 0.0209, |
|
"eval_rougeL": 0.0804, |
|
"eval_rougeLsum": 0.0802, |
|
"eval_runtime": 14.5519, |
|
"eval_samples_per_second": 7.903, |
|
"eval_steps_per_second": 1.374, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"eval_gen_len": 16.3565, |
|
"eval_loss": 15.872416496276855, |
|
"eval_rouge1": 0.0841, |
|
"eval_rouge2": 0.0149, |
|
"eval_rougeL": 0.0674, |
|
"eval_rougeLsum": 0.0674, |
|
"eval_runtime": 15.4052, |
|
"eval_samples_per_second": 7.465, |
|
"eval_steps_per_second": 1.298, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 17.97, |
|
"eval_gen_len": 15.2609, |
|
"eval_loss": 15.058935165405273, |
|
"eval_rouge1": 0.0697, |
|
"eval_rouge2": 0.0097, |
|
"eval_rougeL": 0.0554, |
|
"eval_rougeLsum": 0.0556, |
|
"eval_runtime": 22.9079, |
|
"eval_samples_per_second": 5.02, |
|
"eval_steps_per_second": 0.873, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 18.96, |
|
"eval_gen_len": 14.7304, |
|
"eval_loss": 14.074901580810547, |
|
"eval_rouge1": 0.0584, |
|
"eval_rouge2": 0.0065, |
|
"eval_rougeL": 0.047, |
|
"eval_rougeLsum": 0.0472, |
|
"eval_runtime": 13.8432, |
|
"eval_samples_per_second": 8.307, |
|
"eval_steps_per_second": 1.445, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_gen_len": 12.0783, |
|
"eval_loss": 12.981775283813477, |
|
"eval_rouge1": 0.037, |
|
"eval_rouge2": 0.004, |
|
"eval_rougeL": 0.0314, |
|
"eval_rougeLsum": 0.0312, |
|
"eval_runtime": 17.4992, |
|
"eval_samples_per_second": 6.572, |
|
"eval_steps_per_second": 1.143, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 20.99, |
|
"eval_gen_len": 13.3043, |
|
"eval_loss": 12.14104175567627, |
|
"eval_rouge1": 0.0327, |
|
"eval_rouge2": 0.0027, |
|
"eval_rougeL": 0.0287, |
|
"eval_rougeLsum": 0.0288, |
|
"eval_runtime": 14.6695, |
|
"eval_samples_per_second": 7.839, |
|
"eval_steps_per_second": 1.363, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 21.97, |
|
"eval_gen_len": 14.1565, |
|
"eval_loss": 11.347674369812012, |
|
"eval_rouge1": 0.0206, |
|
"eval_rouge2": 0.0006, |
|
"eval_rougeL": 0.0188, |
|
"eval_rougeLsum": 0.019, |
|
"eval_runtime": 14.3559, |
|
"eval_samples_per_second": 8.011, |
|
"eval_steps_per_second": 1.393, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 22.96, |
|
"eval_gen_len": 14.5652, |
|
"eval_loss": 10.547377586364746, |
|
"eval_rouge1": 0.0136, |
|
"eval_rouge2": 0.0008, |
|
"eval_rougeL": 0.0121, |
|
"eval_rougeLsum": 0.0123, |
|
"eval_runtime": 16.36, |
|
"eval_samples_per_second": 7.029, |
|
"eval_steps_per_second": 1.222, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_gen_len": 15.9391, |
|
"eval_loss": 9.721901893615723, |
|
"eval_rouge1": 0.0056, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0051, |
|
"eval_rougeLsum": 0.0051, |
|
"eval_runtime": 17.3804, |
|
"eval_samples_per_second": 6.617, |
|
"eval_steps_per_second": 1.151, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 24.99, |
|
"eval_gen_len": 17.0522, |
|
"eval_loss": 8.976031303405762, |
|
"eval_rouge1": 0.0029, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0026, |
|
"eval_rougeLsum": 0.0026, |
|
"eval_runtime": 15.7718, |
|
"eval_samples_per_second": 7.291, |
|
"eval_steps_per_second": 1.268, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 25.97, |
|
"grad_norm": 6.211065292358398, |
|
"learning_rate": 1.8252631578947372e-05, |
|
"loss": 16.8471, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 25.97, |
|
"eval_gen_len": 18.0261, |
|
"eval_loss": 8.254261016845703, |
|
"eval_rouge1": 0.001, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.001, |
|
"eval_rougeLsum": 0.001, |
|
"eval_runtime": 18.4696, |
|
"eval_samples_per_second": 6.226, |
|
"eval_steps_per_second": 1.083, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 26.96, |
|
"eval_gen_len": 18.8609, |
|
"eval_loss": 7.542705059051514, |
|
"eval_rouge1": 0.0008, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0009, |
|
"eval_rougeLsum": 0.0008, |
|
"eval_runtime": 14.9383, |
|
"eval_samples_per_second": 7.698, |
|
"eval_steps_per_second": 1.339, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 6.831495761871338, |
|
"eval_rouge1": 0.0007, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0007, |
|
"eval_rougeLsum": 0.0007, |
|
"eval_runtime": 15.5617, |
|
"eval_samples_per_second": 7.39, |
|
"eval_steps_per_second": 1.285, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 28.99, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 6.190303325653076, |
|
"eval_rouge1": 0.0002, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0002, |
|
"eval_rougeLsum": 0.0002, |
|
"eval_runtime": 15.014, |
|
"eval_samples_per_second": 7.66, |
|
"eval_steps_per_second": 1.332, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 29.97, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 5.610296726226807, |
|
"eval_rouge1": 0.0018, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0016, |
|
"eval_rougeLsum": 0.0016, |
|
"eval_runtime": 15.3165, |
|
"eval_samples_per_second": 7.508, |
|
"eval_steps_per_second": 1.306, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 30.96, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 5.068519592285156, |
|
"eval_rouge1": 0.0011, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0011, |
|
"eval_rougeLsum": 0.0011, |
|
"eval_runtime": 16.2303, |
|
"eval_samples_per_second": 7.086, |
|
"eval_steps_per_second": 1.232, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 4.54244327545166, |
|
"eval_rouge1": 0.0009, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0009, |
|
"eval_rougeLsum": 0.0009, |
|
"eval_runtime": 14.4723, |
|
"eval_samples_per_second": 7.946, |
|
"eval_steps_per_second": 1.382, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 32.99, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 4.084940433502197, |
|
"eval_rouge1": 0.0008, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0008, |
|
"eval_rougeLsum": 0.0008, |
|
"eval_runtime": 17.3788, |
|
"eval_samples_per_second": 6.617, |
|
"eval_steps_per_second": 1.151, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 33.97, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 3.7023561000823975, |
|
"eval_rouge1": 0.0014, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0015, |
|
"eval_rougeLsum": 0.0015, |
|
"eval_runtime": 17.4976, |
|
"eval_samples_per_second": 6.572, |
|
"eval_steps_per_second": 1.143, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 34.96, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 3.3644134998321533, |
|
"eval_rouge1": 0.0035, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0035, |
|
"eval_rougeLsum": 0.0035, |
|
"eval_runtime": 15.0993, |
|
"eval_samples_per_second": 7.616, |
|
"eval_steps_per_second": 1.325, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 3.0496110916137695, |
|
"eval_rouge1": 0.0064, |
|
"eval_rouge2": 0.0002, |
|
"eval_rougeL": 0.0063, |
|
"eval_rougeLsum": 0.0064, |
|
"eval_runtime": 26.2608, |
|
"eval_samples_per_second": 4.379, |
|
"eval_steps_per_second": 0.762, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 36.99, |
|
"eval_gen_len": 18.9913, |
|
"eval_loss": 2.7962286472320557, |
|
"eval_rouge1": 0.0073, |
|
"eval_rouge2": 0.0002, |
|
"eval_rougeL": 0.0073, |
|
"eval_rougeLsum": 0.0074, |
|
"eval_runtime": 16.0565, |
|
"eval_samples_per_second": 7.162, |
|
"eval_steps_per_second": 1.246, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 37.97, |
|
"eval_gen_len": 18.8435, |
|
"eval_loss": 2.5821166038513184, |
|
"eval_rouge1": 0.0078, |
|
"eval_rouge2": 0.0002, |
|
"eval_rougeL": 0.0076, |
|
"eval_rougeLsum": 0.0078, |
|
"eval_runtime": 24.2703, |
|
"eval_samples_per_second": 4.738, |
|
"eval_steps_per_second": 0.824, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 38.96, |
|
"eval_gen_len": 16.9043, |
|
"eval_loss": 2.4025700092315674, |
|
"eval_rouge1": 0.0063, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0063, |
|
"eval_rougeLsum": 0.0063, |
|
"eval_runtime": 12.9295, |
|
"eval_samples_per_second": 8.894, |
|
"eval_steps_per_second": 1.547, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_gen_len": 9.6696, |
|
"eval_loss": 2.2464537620544434, |
|
"eval_rouge1": 0.0008, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0008, |
|
"eval_rougeLsum": 0.0008, |
|
"eval_runtime": 14.656, |
|
"eval_samples_per_second": 7.847, |
|
"eval_steps_per_second": 1.365, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 40.99, |
|
"eval_gen_len": 7.4435, |
|
"eval_loss": 2.124486207962036, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 21.0326, |
|
"eval_samples_per_second": 5.468, |
|
"eval_steps_per_second": 0.951, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 41.97, |
|
"eval_gen_len": 6.9478, |
|
"eval_loss": 2.022434949874878, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 12.9888, |
|
"eval_samples_per_second": 8.854, |
|
"eval_steps_per_second": 1.54, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 42.96, |
|
"eval_gen_len": 6.4696, |
|
"eval_loss": 1.9459978342056274, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 10.4716, |
|
"eval_samples_per_second": 10.982, |
|
"eval_steps_per_second": 1.91, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_gen_len": 6.1304, |
|
"eval_loss": 1.8852447271347046, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 10.5001, |
|
"eval_samples_per_second": 10.952, |
|
"eval_steps_per_second": 1.905, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 44.99, |
|
"eval_gen_len": 5.9391, |
|
"eval_loss": 1.838249921798706, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 10.0487, |
|
"eval_samples_per_second": 11.444, |
|
"eval_steps_per_second": 1.99, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 45.97, |
|
"eval_gen_len": 6.087, |
|
"eval_loss": 1.7976738214492798, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 10.2357, |
|
"eval_samples_per_second": 11.235, |
|
"eval_steps_per_second": 1.954, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 46.96, |
|
"eval_gen_len": 6.2609, |
|
"eval_loss": 1.7594256401062012, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 10.0331, |
|
"eval_samples_per_second": 11.462, |
|
"eval_steps_per_second": 1.993, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_gen_len": 6.3565, |
|
"eval_loss": 1.7259361743927002, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 10.0928, |
|
"eval_samples_per_second": 11.394, |
|
"eval_steps_per_second": 1.982, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 48.99, |
|
"eval_gen_len": 6.0348, |
|
"eval_loss": 1.7035044431686401, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 10.501, |
|
"eval_samples_per_second": 10.951, |
|
"eval_steps_per_second": 1.905, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 49.97, |
|
"eval_gen_len": 6.113, |
|
"eval_loss": 1.681233525276184, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 19.043, |
|
"eval_samples_per_second": 6.039, |
|
"eval_steps_per_second": 1.05, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 50.96, |
|
"eval_gen_len": 5.8696, |
|
"eval_loss": 1.6589038372039795, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 22.3499, |
|
"eval_samples_per_second": 5.145, |
|
"eval_steps_per_second": 0.895, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 51.95, |
|
"grad_norm": 2.3630588054656982, |
|
"learning_rate": 1.650526315789474e-05, |
|
"loss": 4.012, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_gen_len": 5.713, |
|
"eval_loss": 1.639954924583435, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 22.1264, |
|
"eval_samples_per_second": 5.197, |
|
"eval_steps_per_second": 0.904, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 52.99, |
|
"eval_gen_len": 5.6957, |
|
"eval_loss": 1.6223595142364502, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 19.2896, |
|
"eval_samples_per_second": 5.962, |
|
"eval_steps_per_second": 1.037, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 53.97, |
|
"eval_gen_len": 5.887, |
|
"eval_loss": 1.6063199043273926, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 17.9683, |
|
"eval_samples_per_second": 6.4, |
|
"eval_steps_per_second": 1.113, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 54.96, |
|
"eval_gen_len": 5.9826, |
|
"eval_loss": 1.5919499397277832, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 20.6085, |
|
"eval_samples_per_second": 5.58, |
|
"eval_steps_per_second": 0.97, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_gen_len": 6.0087, |
|
"eval_loss": 1.5780121088027954, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 22.8466, |
|
"eval_samples_per_second": 5.034, |
|
"eval_steps_per_second": 0.875, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 56.99, |
|
"eval_gen_len": 5.9652, |
|
"eval_loss": 1.5654348134994507, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 16.8326, |
|
"eval_samples_per_second": 6.832, |
|
"eval_steps_per_second": 1.188, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 57.97, |
|
"eval_gen_len": 6.3304, |
|
"eval_loss": 1.5537272691726685, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 14.7124, |
|
"eval_samples_per_second": 7.817, |
|
"eval_steps_per_second": 1.359, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 58.96, |
|
"eval_gen_len": 6.8609, |
|
"eval_loss": 1.5426743030548096, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 14.3263, |
|
"eval_samples_per_second": 8.027, |
|
"eval_steps_per_second": 1.396, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_gen_len": 7.2, |
|
"eval_loss": 1.5310094356536865, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 17.746, |
|
"eval_samples_per_second": 6.48, |
|
"eval_steps_per_second": 1.127, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 60.99, |
|
"eval_gen_len": 7.4261, |
|
"eval_loss": 1.519776701927185, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 16.3177, |
|
"eval_samples_per_second": 7.048, |
|
"eval_steps_per_second": 1.226, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 61.97, |
|
"eval_gen_len": 6.9826, |
|
"eval_loss": 1.5120151042938232, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 15.0793, |
|
"eval_samples_per_second": 7.626, |
|
"eval_steps_per_second": 1.326, |
|
"step": 1193 |
|
}, |
|
{ |
|
"epoch": 62.96, |
|
"eval_gen_len": 6.6957, |
|
"eval_loss": 1.500430941581726, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 16.015, |
|
"eval_samples_per_second": 7.181, |
|
"eval_steps_per_second": 1.249, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_gen_len": 6.9565, |
|
"eval_loss": 1.489511489868164, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 10.2674, |
|
"eval_samples_per_second": 11.2, |
|
"eval_steps_per_second": 1.948, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 64.99, |
|
"eval_gen_len": 7.2348, |
|
"eval_loss": 1.4760735034942627, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 14.5412, |
|
"eval_samples_per_second": 7.909, |
|
"eval_steps_per_second": 1.375, |
|
"step": 1251 |
|
}, |
|
{ |
|
"epoch": 65.97, |
|
"eval_gen_len": 7.5043, |
|
"eval_loss": 1.4650626182556152, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 19.5952, |
|
"eval_samples_per_second": 5.869, |
|
"eval_steps_per_second": 1.021, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 66.96, |
|
"eval_gen_len": 7.4174, |
|
"eval_loss": 1.4578195810317993, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 15.4051, |
|
"eval_samples_per_second": 7.465, |
|
"eval_steps_per_second": 1.298, |
|
"step": 1289 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_gen_len": 7.5304, |
|
"eval_loss": 1.449414610862732, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 21.6421, |
|
"eval_samples_per_second": 5.314, |
|
"eval_steps_per_second": 0.924, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 68.99, |
|
"eval_gen_len": 7.4261, |
|
"eval_loss": 1.4453145265579224, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 15.2733, |
|
"eval_samples_per_second": 7.529, |
|
"eval_steps_per_second": 1.309, |
|
"step": 1328 |
|
}, |
|
{ |
|
"epoch": 69.97, |
|
"eval_gen_len": 7.5217, |
|
"eval_loss": 1.4360324144363403, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 17.557, |
|
"eval_samples_per_second": 6.55, |
|
"eval_steps_per_second": 1.139, |
|
"step": 1347 |
|
}, |
|
{ |
|
"epoch": 70.96, |
|
"eval_gen_len": 7.513, |
|
"eval_loss": 1.4272183179855347, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 15.8995, |
|
"eval_samples_per_second": 7.233, |
|
"eval_steps_per_second": 1.258, |
|
"step": 1366 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_gen_len": 7.5391, |
|
"eval_loss": 1.420629620552063, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 18.7912, |
|
"eval_samples_per_second": 6.12, |
|
"eval_steps_per_second": 1.064, |
|
"step": 1386 |
|
}, |
|
{ |
|
"epoch": 72.99, |
|
"eval_gen_len": 7.6261, |
|
"eval_loss": 1.4113017320632935, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 17.3516, |
|
"eval_samples_per_second": 6.628, |
|
"eval_steps_per_second": 1.153, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 73.97, |
|
"eval_gen_len": 7.9478, |
|
"eval_loss": 1.4024852514266968, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 15.9512, |
|
"eval_samples_per_second": 7.21, |
|
"eval_steps_per_second": 1.254, |
|
"step": 1424 |
|
}, |
|
{ |
|
"epoch": 74.96, |
|
"eval_gen_len": 7.687, |
|
"eval_loss": 1.3967483043670654, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 13.9958, |
|
"eval_samples_per_second": 8.217, |
|
"eval_steps_per_second": 1.429, |
|
"step": 1443 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_gen_len": 7.5391, |
|
"eval_loss": 1.390748143196106, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 21.6525, |
|
"eval_samples_per_second": 5.311, |
|
"eval_steps_per_second": 0.924, |
|
"step": 1463 |
|
}, |
|
{ |
|
"epoch": 76.99, |
|
"eval_gen_len": 7.687, |
|
"eval_loss": 1.3812955617904663, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 14.5594, |
|
"eval_samples_per_second": 7.899, |
|
"eval_steps_per_second": 1.374, |
|
"step": 1482 |
|
}, |
|
{ |
|
"epoch": 77.92, |
|
"grad_norm": 4.4105072021484375, |
|
"learning_rate": 1.475438596491228e-05, |
|
"loss": 1.7845, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 77.97, |
|
"eval_gen_len": 7.8174, |
|
"eval_loss": 1.373058557510376, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 13.5779, |
|
"eval_samples_per_second": 8.47, |
|
"eval_steps_per_second": 1.473, |
|
"step": 1501 |
|
}, |
|
{ |
|
"epoch": 78.96, |
|
"eval_gen_len": 8.0435, |
|
"eval_loss": 1.364722728729248, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 13.5052, |
|
"eval_samples_per_second": 8.515, |
|
"eval_steps_per_second": 1.481, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_gen_len": 8.4087, |
|
"eval_loss": 1.3542518615722656, |
|
"eval_rouge1": 0.001, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0007, |
|
"eval_rougeLsum": 0.0007, |
|
"eval_runtime": 16.4169, |
|
"eval_samples_per_second": 7.005, |
|
"eval_steps_per_second": 1.218, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 80.99, |
|
"eval_gen_len": 8.2, |
|
"eval_loss": 1.3473597764968872, |
|
"eval_rouge1": 0.0006, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 16.0157, |
|
"eval_samples_per_second": 7.18, |
|
"eval_steps_per_second": 1.249, |
|
"step": 1559 |
|
}, |
|
{ |
|
"epoch": 81.97, |
|
"eval_gen_len": 7.7739, |
|
"eval_loss": 1.3397005796432495, |
|
"eval_rouge1": 0.0006, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0006, |
|
"eval_rougeLsum": 0.0006, |
|
"eval_runtime": 15.1007, |
|
"eval_samples_per_second": 7.616, |
|
"eval_steps_per_second": 1.324, |
|
"step": 1578 |
|
}, |
|
{ |
|
"epoch": 82.96, |
|
"eval_gen_len": 7.4783, |
|
"eval_loss": 1.3318209648132324, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 15.6414, |
|
"eval_samples_per_second": 7.352, |
|
"eval_steps_per_second": 1.279, |
|
"step": 1597 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_gen_len": 7.7478, |
|
"eval_loss": 1.3251750469207764, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 15.8191, |
|
"eval_samples_per_second": 7.27, |
|
"eval_steps_per_second": 1.264, |
|
"step": 1617 |
|
}, |
|
{ |
|
"epoch": 84.99, |
|
"eval_gen_len": 7.8609, |
|
"eval_loss": 1.3169076442718506, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 14.9535, |
|
"eval_samples_per_second": 7.691, |
|
"eval_steps_per_second": 1.337, |
|
"step": 1636 |
|
}, |
|
{ |
|
"epoch": 85.97, |
|
"eval_gen_len": 8.0609, |
|
"eval_loss": 1.308994174003601, |
|
"eval_rouge1": 0.0011, |
|
"eval_rouge2": 0.0004, |
|
"eval_rougeL": 0.0009, |
|
"eval_rougeLsum": 0.0009, |
|
"eval_runtime": 17.1311, |
|
"eval_samples_per_second": 6.713, |
|
"eval_steps_per_second": 1.167, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 86.96, |
|
"eval_gen_len": 8.4174, |
|
"eval_loss": 1.3022288084030151, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 18.0371, |
|
"eval_samples_per_second": 6.376, |
|
"eval_steps_per_second": 1.109, |
|
"step": 1674 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_gen_len": 8.6696, |
|
"eval_loss": 1.2966970205307007, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 17.2294, |
|
"eval_samples_per_second": 6.675, |
|
"eval_steps_per_second": 1.161, |
|
"step": 1694 |
|
}, |
|
{ |
|
"epoch": 88.99, |
|
"eval_gen_len": 8.5913, |
|
"eval_loss": 1.2914807796478271, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 16.2714, |
|
"eval_samples_per_second": 7.068, |
|
"eval_steps_per_second": 1.229, |
|
"step": 1713 |
|
}, |
|
{ |
|
"epoch": 89.97, |
|
"eval_gen_len": 8.4609, |
|
"eval_loss": 1.285845398902893, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 16.2006, |
|
"eval_samples_per_second": 7.099, |
|
"eval_steps_per_second": 1.235, |
|
"step": 1732 |
|
}, |
|
{ |
|
"epoch": 90.96, |
|
"eval_gen_len": 8.3304, |
|
"eval_loss": 1.2773631811141968, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 14.3032, |
|
"eval_samples_per_second": 8.04, |
|
"eval_steps_per_second": 1.398, |
|
"step": 1751 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_gen_len": 8.4087, |
|
"eval_loss": 1.2694664001464844, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 14.8525, |
|
"eval_samples_per_second": 7.743, |
|
"eval_steps_per_second": 1.347, |
|
"step": 1771 |
|
}, |
|
{ |
|
"epoch": 92.99, |
|
"eval_gen_len": 8.5217, |
|
"eval_loss": 1.2651293277740479, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 14.3351, |
|
"eval_samples_per_second": 8.022, |
|
"eval_steps_per_second": 1.395, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 93.97, |
|
"eval_gen_len": 8.5217, |
|
"eval_loss": 1.2624008655548096, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 14.2422, |
|
"eval_samples_per_second": 8.075, |
|
"eval_steps_per_second": 1.404, |
|
"step": 1809 |
|
}, |
|
{ |
|
"epoch": 94.96, |
|
"eval_gen_len": 8.4783, |
|
"eval_loss": 1.2562423944473267, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 15.5045, |
|
"eval_samples_per_second": 7.417, |
|
"eval_steps_per_second": 1.29, |
|
"step": 1828 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_gen_len": 8.287, |
|
"eval_loss": 1.2521991729736328, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 18.0292, |
|
"eval_samples_per_second": 6.379, |
|
"eval_steps_per_second": 1.109, |
|
"step": 1848 |
|
}, |
|
{ |
|
"epoch": 96.99, |
|
"eval_gen_len": 8.2522, |
|
"eval_loss": 1.2463409900665283, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 16.1387, |
|
"eval_samples_per_second": 7.126, |
|
"eval_steps_per_second": 1.239, |
|
"step": 1867 |
|
}, |
|
{ |
|
"epoch": 97.97, |
|
"eval_gen_len": 8.5217, |
|
"eval_loss": 1.2417724132537842, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 24.8398, |
|
"eval_samples_per_second": 4.63, |
|
"eval_steps_per_second": 0.805, |
|
"step": 1886 |
|
}, |
|
{ |
|
"epoch": 98.96, |
|
"eval_gen_len": 8.6609, |
|
"eval_loss": 1.2342702150344849, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 13.8803, |
|
"eval_samples_per_second": 8.285, |
|
"eval_steps_per_second": 1.441, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_gen_len": 8.687, |
|
"eval_loss": 1.2301725149154663, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 15.943, |
|
"eval_samples_per_second": 7.213, |
|
"eval_steps_per_second": 1.254, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 100.99, |
|
"eval_gen_len": 8.4609, |
|
"eval_loss": 1.226989507675171, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 16.4066, |
|
"eval_samples_per_second": 7.009, |
|
"eval_steps_per_second": 1.219, |
|
"step": 1944 |
|
}, |
|
{ |
|
"epoch": 101.97, |
|
"eval_gen_len": 8.2957, |
|
"eval_loss": 1.220055103302002, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 17.02, |
|
"eval_samples_per_second": 6.757, |
|
"eval_steps_per_second": 1.175, |
|
"step": 1963 |
|
}, |
|
{ |
|
"epoch": 102.96, |
|
"eval_gen_len": 8.1826, |
|
"eval_loss": 1.215019702911377, |
|
"eval_rouge1": 0.0003, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 17.3994, |
|
"eval_samples_per_second": 6.609, |
|
"eval_steps_per_second": 1.149, |
|
"step": 1982 |
|
}, |
|
{ |
|
"epoch": 103.9, |
|
"grad_norm": 4.967583656311035, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"loss": 1.5128, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"eval_gen_len": 8.2087, |
|
"eval_loss": 1.2050235271453857, |
|
"eval_rouge1": 0.0003, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 14.9921, |
|
"eval_samples_per_second": 7.671, |
|
"eval_steps_per_second": 1.334, |
|
"step": 2002 |
|
}, |
|
{ |
|
"epoch": 104.99, |
|
"eval_gen_len": 8.4696, |
|
"eval_loss": 1.1983749866485596, |
|
"eval_rouge1": 0.0006, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 16.8064, |
|
"eval_samples_per_second": 6.843, |
|
"eval_steps_per_second": 1.19, |
|
"step": 2021 |
|
}, |
|
{ |
|
"epoch": 105.97, |
|
"eval_gen_len": 8.8435, |
|
"eval_loss": 1.1935399770736694, |
|
"eval_rouge1": 0.0003, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 15.6582, |
|
"eval_samples_per_second": 7.344, |
|
"eval_steps_per_second": 1.277, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 106.96, |
|
"eval_gen_len": 8.7739, |
|
"eval_loss": 1.1894173622131348, |
|
"eval_rouge1": 0.0003, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 16.7955, |
|
"eval_samples_per_second": 6.847, |
|
"eval_steps_per_second": 1.191, |
|
"step": 2059 |
|
}, |
|
{ |
|
"epoch": 108.0, |
|
"eval_gen_len": 8.5565, |
|
"eval_loss": 1.1841349601745605, |
|
"eval_rouge1": 0.0003, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 15.4737, |
|
"eval_samples_per_second": 7.432, |
|
"eval_steps_per_second": 1.293, |
|
"step": 2079 |
|
}, |
|
{ |
|
"epoch": 108.99, |
|
"eval_gen_len": 8.6435, |
|
"eval_loss": 1.1762468814849854, |
|
"eval_rouge1": 0.0003, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 15.4231, |
|
"eval_samples_per_second": 7.456, |
|
"eval_steps_per_second": 1.297, |
|
"step": 2098 |
|
}, |
|
{ |
|
"epoch": 109.97, |
|
"eval_gen_len": 8.513, |
|
"eval_loss": 1.1688281297683716, |
|
"eval_rouge1": 0.0003, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 23.2171, |
|
"eval_samples_per_second": 4.953, |
|
"eval_steps_per_second": 0.861, |
|
"step": 2117 |
|
}, |
|
{ |
|
"epoch": 110.96, |
|
"eval_gen_len": 8.4522, |
|
"eval_loss": 1.163394570350647, |
|
"eval_rouge1": 0.0003, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 14.9862, |
|
"eval_samples_per_second": 7.674, |
|
"eval_steps_per_second": 1.335, |
|
"step": 2136 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"eval_gen_len": 8.4261, |
|
"eval_loss": 1.1577537059783936, |
|
"eval_rouge1": 0.001, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.001, |
|
"eval_rougeLsum": 0.0009, |
|
"eval_runtime": 18.0754, |
|
"eval_samples_per_second": 6.362, |
|
"eval_steps_per_second": 1.106, |
|
"step": 2156 |
|
}, |
|
{ |
|
"epoch": 112.99, |
|
"eval_gen_len": 8.4087, |
|
"eval_loss": 1.1507985591888428, |
|
"eval_rouge1": 0.0003, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 17.1754, |
|
"eval_samples_per_second": 6.696, |
|
"eval_steps_per_second": 1.164, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 113.97, |
|
"eval_gen_len": 8.6696, |
|
"eval_loss": 1.1435272693634033, |
|
"eval_rouge1": 0.0003, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 15.4446, |
|
"eval_samples_per_second": 7.446, |
|
"eval_steps_per_second": 1.295, |
|
"step": 2194 |
|
}, |
|
{ |
|
"epoch": 114.96, |
|
"eval_gen_len": 8.8087, |
|
"eval_loss": 1.1399484872817993, |
|
"eval_rouge1": 0.0006, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 17.4221, |
|
"eval_samples_per_second": 6.601, |
|
"eval_steps_per_second": 1.148, |
|
"step": 2213 |
|
}, |
|
{ |
|
"epoch": 116.0, |
|
"eval_gen_len": 8.7565, |
|
"eval_loss": 1.1332604885101318, |
|
"eval_rouge1": 0.0006, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 16.1131, |
|
"eval_samples_per_second": 7.137, |
|
"eval_steps_per_second": 1.241, |
|
"step": 2233 |
|
}, |
|
{ |
|
"epoch": 116.99, |
|
"eval_gen_len": 8.7478, |
|
"eval_loss": 1.1271406412124634, |
|
"eval_rouge1": 0.0003, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 16.9778, |
|
"eval_samples_per_second": 6.774, |
|
"eval_steps_per_second": 1.178, |
|
"step": 2252 |
|
}, |
|
{ |
|
"epoch": 117.97, |
|
"eval_gen_len": 8.8609, |
|
"eval_loss": 1.1240047216415405, |
|
"eval_rouge1": 0.0003, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 14.2803, |
|
"eval_samples_per_second": 8.053, |
|
"eval_steps_per_second": 1.401, |
|
"step": 2271 |
|
}, |
|
{ |
|
"epoch": 118.96, |
|
"eval_gen_len": 8.7391, |
|
"eval_loss": 1.1195180416107178, |
|
"eval_rouge1": 0.0005, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0005, |
|
"eval_rougeLsum": 0.0005, |
|
"eval_runtime": 14.6134, |
|
"eval_samples_per_second": 7.869, |
|
"eval_steps_per_second": 1.369, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"eval_gen_len": 8.7043, |
|
"eval_loss": 1.113542079925537, |
|
"eval_rouge1": 0.0005, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0005, |
|
"eval_rougeLsum": 0.0005, |
|
"eval_runtime": 21.0862, |
|
"eval_samples_per_second": 5.454, |
|
"eval_steps_per_second": 0.948, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 120.99, |
|
"eval_gen_len": 8.5043, |
|
"eval_loss": 1.1078674793243408, |
|
"eval_rouge1": 0.0003, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 18.0247, |
|
"eval_samples_per_second": 6.38, |
|
"eval_steps_per_second": 1.11, |
|
"step": 2329 |
|
}, |
|
{ |
|
"epoch": 121.97, |
|
"eval_gen_len": 8.4696, |
|
"eval_loss": 1.0989575386047363, |
|
"eval_rouge1": 0.0003, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 16.1592, |
|
"eval_samples_per_second": 7.117, |
|
"eval_steps_per_second": 1.238, |
|
"step": 2348 |
|
}, |
|
{ |
|
"epoch": 122.96, |
|
"eval_gen_len": 8.8261, |
|
"eval_loss": 1.0940810441970825, |
|
"eval_rouge1": 0.0003, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 29.3501, |
|
"eval_samples_per_second": 3.918, |
|
"eval_steps_per_second": 0.681, |
|
"step": 2367 |
|
}, |
|
{ |
|
"epoch": 124.0, |
|
"eval_gen_len": 8.7826, |
|
"eval_loss": 1.0875351428985596, |
|
"eval_rouge1": 0.0003, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 17.9868, |
|
"eval_samples_per_second": 6.394, |
|
"eval_steps_per_second": 1.112, |
|
"step": 2387 |
|
}, |
|
{ |
|
"epoch": 124.99, |
|
"eval_gen_len": 8.5913, |
|
"eval_loss": 1.083350658416748, |
|
"eval_rouge1": 0.0003, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 17.325, |
|
"eval_samples_per_second": 6.638, |
|
"eval_steps_per_second": 1.154, |
|
"step": 2406 |
|
}, |
|
{ |
|
"epoch": 125.97, |
|
"eval_gen_len": 8.9652, |
|
"eval_loss": 1.0746002197265625, |
|
"eval_rouge1": 0.0003, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 15.8215, |
|
"eval_samples_per_second": 7.269, |
|
"eval_steps_per_second": 1.264, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 126.96, |
|
"eval_gen_len": 9.0696, |
|
"eval_loss": 1.0692858695983887, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 15.6722, |
|
"eval_samples_per_second": 7.338, |
|
"eval_steps_per_second": 1.276, |
|
"step": 2444 |
|
}, |
|
{ |
|
"epoch": 128.0, |
|
"eval_gen_len": 9.0261, |
|
"eval_loss": 1.0652384757995605, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 24.7207, |
|
"eval_samples_per_second": 4.652, |
|
"eval_steps_per_second": 0.809, |
|
"step": 2464 |
|
}, |
|
{ |
|
"epoch": 128.99, |
|
"eval_gen_len": 9.0348, |
|
"eval_loss": 1.0582802295684814, |
|
"eval_rouge1": 0.0003, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 17.9675, |
|
"eval_samples_per_second": 6.4, |
|
"eval_steps_per_second": 1.113, |
|
"step": 2483 |
|
}, |
|
{ |
|
"epoch": 129.87, |
|
"grad_norm": 1.7797880172729492, |
|
"learning_rate": 1.124561403508772e-05, |
|
"loss": 1.3193, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 129.97, |
|
"eval_gen_len": 9.1217, |
|
"eval_loss": 1.0517534017562866, |
|
"eval_rouge1": 0.0003, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 15.6173, |
|
"eval_samples_per_second": 7.364, |
|
"eval_steps_per_second": 1.281, |
|
"step": 2502 |
|
}, |
|
{ |
|
"epoch": 130.96, |
|
"eval_gen_len": 8.887, |
|
"eval_loss": 1.0467168092727661, |
|
"eval_rouge1": 0.0003, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 15.1436, |
|
"eval_samples_per_second": 7.594, |
|
"eval_steps_per_second": 1.321, |
|
"step": 2521 |
|
}, |
|
{ |
|
"epoch": 132.0, |
|
"eval_gen_len": 8.8348, |
|
"eval_loss": 1.0417622327804565, |
|
"eval_rouge1": 0.0003, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 14.5373, |
|
"eval_samples_per_second": 7.911, |
|
"eval_steps_per_second": 1.376, |
|
"step": 2541 |
|
}, |
|
{ |
|
"epoch": 132.99, |
|
"eval_gen_len": 8.7826, |
|
"eval_loss": 1.0359249114990234, |
|
"eval_rouge1": 0.0003, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 15.5453, |
|
"eval_samples_per_second": 7.398, |
|
"eval_steps_per_second": 1.287, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 133.97, |
|
"eval_gen_len": 8.7217, |
|
"eval_loss": 1.0301254987716675, |
|
"eval_rouge1": 0.0003, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 17.6306, |
|
"eval_samples_per_second": 6.523, |
|
"eval_steps_per_second": 1.134, |
|
"step": 2579 |
|
}, |
|
{ |
|
"epoch": 134.96, |
|
"eval_gen_len": 8.7739, |
|
"eval_loss": 1.0256870985031128, |
|
"eval_rouge1": 0.0003, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0003, |
|
"eval_rougeLsum": 0.0003, |
|
"eval_runtime": 15.6433, |
|
"eval_samples_per_second": 7.351, |
|
"eval_steps_per_second": 1.279, |
|
"step": 2598 |
|
}, |
|
{ |
|
"epoch": 136.0, |
|
"eval_gen_len": 9.2348, |
|
"eval_loss": 1.0207563638687134, |
|
"eval_rouge1": 0.0018, |
|
"eval_rouge2": 0.0009, |
|
"eval_rougeL": 0.0021, |
|
"eval_rougeLsum": 0.0018, |
|
"eval_runtime": 16.9915, |
|
"eval_samples_per_second": 6.768, |
|
"eval_steps_per_second": 1.177, |
|
"step": 2618 |
|
}, |
|
{ |
|
"epoch": 136.99, |
|
"eval_gen_len": 9.4783, |
|
"eval_loss": 1.0155842304229736, |
|
"eval_rouge1": 0.0028, |
|
"eval_rouge2": 0.0011, |
|
"eval_rougeL": 0.0028, |
|
"eval_rougeLsum": 0.0028, |
|
"eval_runtime": 15.1617, |
|
"eval_samples_per_second": 7.585, |
|
"eval_steps_per_second": 1.319, |
|
"step": 2637 |
|
}, |
|
{ |
|
"epoch": 137.97, |
|
"eval_gen_len": 9.4609, |
|
"eval_loss": 1.010608434677124, |
|
"eval_rouge1": 0.0018, |
|
"eval_rouge2": 0.0009, |
|
"eval_rougeL": 0.0018, |
|
"eval_rougeLsum": 0.0018, |
|
"eval_runtime": 17.3586, |
|
"eval_samples_per_second": 6.625, |
|
"eval_steps_per_second": 1.152, |
|
"step": 2656 |
|
}, |
|
{ |
|
"epoch": 138.96, |
|
"eval_gen_len": 9.4522, |
|
"eval_loss": 1.006165862083435, |
|
"eval_rouge1": 0.0018, |
|
"eval_rouge2": 0.0009, |
|
"eval_rougeL": 0.0018, |
|
"eval_rougeLsum": 0.0018, |
|
"eval_runtime": 15.6312, |
|
"eval_samples_per_second": 7.357, |
|
"eval_steps_per_second": 1.279, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 140.0, |
|
"eval_gen_len": 9.4435, |
|
"eval_loss": 1.0015084743499756, |
|
"eval_rouge1": 0.0018, |
|
"eval_rouge2": 0.0009, |
|
"eval_rougeL": 0.0018, |
|
"eval_rougeLsum": 0.0018, |
|
"eval_runtime": 15.1443, |
|
"eval_samples_per_second": 7.594, |
|
"eval_steps_per_second": 1.321, |
|
"step": 2695 |
|
}, |
|
{ |
|
"epoch": 140.99, |
|
"eval_gen_len": 9.5913, |
|
"eval_loss": 0.9966647028923035, |
|
"eval_rouge1": 0.0015, |
|
"eval_rouge2": 0.0003, |
|
"eval_rougeL": 0.0018, |
|
"eval_rougeLsum": 0.0015, |
|
"eval_runtime": 16.3381, |
|
"eval_samples_per_second": 7.039, |
|
"eval_steps_per_second": 1.224, |
|
"step": 2714 |
|
}, |
|
{ |
|
"epoch": 141.97, |
|
"eval_gen_len": 9.6783, |
|
"eval_loss": 0.9923425912857056, |
|
"eval_rouge1": 0.0015, |
|
"eval_rouge2": 0.0003, |
|
"eval_rougeL": 0.0018, |
|
"eval_rougeLsum": 0.0015, |
|
"eval_runtime": 14.5033, |
|
"eval_samples_per_second": 7.929, |
|
"eval_steps_per_second": 1.379, |
|
"step": 2733 |
|
}, |
|
{ |
|
"epoch": 142.96, |
|
"eval_gen_len": 9.8, |
|
"eval_loss": 0.9881101250648499, |
|
"eval_rouge1": 0.0015, |
|
"eval_rouge2": 0.0003, |
|
"eval_rougeL": 0.0015, |
|
"eval_rougeLsum": 0.0015, |
|
"eval_runtime": 17.6181, |
|
"eval_samples_per_second": 6.527, |
|
"eval_steps_per_second": 1.135, |
|
"step": 2752 |
|
}, |
|
{ |
|
"epoch": 144.0, |
|
"eval_gen_len": 9.8435, |
|
"eval_loss": 0.9837466478347778, |
|
"eval_rouge1": 0.0015, |
|
"eval_rouge2": 0.0003, |
|
"eval_rougeL": 0.0015, |
|
"eval_rougeLsum": 0.0015, |
|
"eval_runtime": 15.937, |
|
"eval_samples_per_second": 7.216, |
|
"eval_steps_per_second": 1.255, |
|
"step": 2772 |
|
}, |
|
{ |
|
"epoch": 144.99, |
|
"eval_gen_len": 9.9304, |
|
"eval_loss": 0.9798020720481873, |
|
"eval_rouge1": 0.0015, |
|
"eval_rouge2": 0.0003, |
|
"eval_rougeL": 0.0015, |
|
"eval_rougeLsum": 0.0015, |
|
"eval_runtime": 16.407, |
|
"eval_samples_per_second": 7.009, |
|
"eval_steps_per_second": 1.219, |
|
"step": 2791 |
|
}, |
|
{ |
|
"epoch": 145.97, |
|
"eval_gen_len": 9.9826, |
|
"eval_loss": 0.975723922252655, |
|
"eval_rouge1": 0.0015, |
|
"eval_rouge2": 0.0003, |
|
"eval_rougeL": 0.0015, |
|
"eval_rougeLsum": 0.0015, |
|
"eval_runtime": 15.1268, |
|
"eval_samples_per_second": 7.602, |
|
"eval_steps_per_second": 1.322, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 146.96, |
|
"eval_gen_len": 10.0261, |
|
"eval_loss": 0.9714429378509521, |
|
"eval_rouge1": 0.0015, |
|
"eval_rouge2": 0.0003, |
|
"eval_rougeL": 0.0015, |
|
"eval_rougeLsum": 0.0015, |
|
"eval_runtime": 16.0606, |
|
"eval_samples_per_second": 7.16, |
|
"eval_steps_per_second": 1.245, |
|
"step": 2829 |
|
}, |
|
{ |
|
"epoch": 148.0, |
|
"eval_gen_len": 9.9739, |
|
"eval_loss": 0.9681385159492493, |
|
"eval_rouge1": 0.0015, |
|
"eval_rouge2": 0.0003, |
|
"eval_rougeL": 0.0015, |
|
"eval_rougeLsum": 0.0015, |
|
"eval_runtime": 15.6543, |
|
"eval_samples_per_second": 7.346, |
|
"eval_steps_per_second": 1.278, |
|
"step": 2849 |
|
}, |
|
{ |
|
"epoch": 148.99, |
|
"eval_gen_len": 9.9739, |
|
"eval_loss": 0.9637375473976135, |
|
"eval_rouge1": 0.0015, |
|
"eval_rouge2": 0.0003, |
|
"eval_rougeL": 0.0015, |
|
"eval_rougeLsum": 0.0015, |
|
"eval_runtime": 15.1696, |
|
"eval_samples_per_second": 7.581, |
|
"eval_steps_per_second": 1.318, |
|
"step": 2868 |
|
}, |
|
{ |
|
"epoch": 149.97, |
|
"eval_gen_len": 10.0348, |
|
"eval_loss": 0.9596477746963501, |
|
"eval_rouge1": 0.0015, |
|
"eval_rouge2": 0.0009, |
|
"eval_rougeL": 0.0015, |
|
"eval_rougeLsum": 0.0015, |
|
"eval_runtime": 15.2187, |
|
"eval_samples_per_second": 7.557, |
|
"eval_steps_per_second": 1.314, |
|
"step": 2887 |
|
}, |
|
{ |
|
"epoch": 150.96, |
|
"eval_gen_len": 10.0174, |
|
"eval_loss": 0.9558045268058777, |
|
"eval_rouge1": 0.0017, |
|
"eval_rouge2": 0.0009, |
|
"eval_rougeL": 0.0017, |
|
"eval_rougeLsum": 0.0017, |
|
"eval_runtime": 16.4212, |
|
"eval_samples_per_second": 7.003, |
|
"eval_steps_per_second": 1.218, |
|
"step": 2906 |
|
}, |
|
{ |
|
"epoch": 152.0, |
|
"eval_gen_len": 10.1304, |
|
"eval_loss": 0.9513251185417175, |
|
"eval_rouge1": 0.0021, |
|
"eval_rouge2": 0.0005, |
|
"eval_rougeL": 0.0021, |
|
"eval_rougeLsum": 0.0021, |
|
"eval_runtime": 17.1931, |
|
"eval_samples_per_second": 6.689, |
|
"eval_steps_per_second": 1.163, |
|
"step": 2926 |
|
}, |
|
{ |
|
"epoch": 152.99, |
|
"eval_gen_len": 10.1217, |
|
"eval_loss": 0.947124719619751, |
|
"eval_rouge1": 0.0021, |
|
"eval_rouge2": 0.0014, |
|
"eval_rougeL": 0.0021, |
|
"eval_rougeLsum": 0.0021, |
|
"eval_runtime": 16.7224, |
|
"eval_samples_per_second": 6.877, |
|
"eval_steps_per_second": 1.196, |
|
"step": 2945 |
|
}, |
|
{ |
|
"epoch": 153.97, |
|
"eval_gen_len": 10.2696, |
|
"eval_loss": 0.9428749084472656, |
|
"eval_rouge1": 0.0043, |
|
"eval_rouge2": 0.0014, |
|
"eval_rougeL": 0.0037, |
|
"eval_rougeLsum": 0.0036, |
|
"eval_runtime": 15.3406, |
|
"eval_samples_per_second": 7.496, |
|
"eval_steps_per_second": 1.304, |
|
"step": 2964 |
|
}, |
|
{ |
|
"epoch": 154.96, |
|
"eval_gen_len": 10.1217, |
|
"eval_loss": 0.939849853515625, |
|
"eval_rouge1": 0.0021, |
|
"eval_rouge2": 0.0014, |
|
"eval_rougeL": 0.0021, |
|
"eval_rougeLsum": 0.0021, |
|
"eval_runtime": 15.8431, |
|
"eval_samples_per_second": 7.259, |
|
"eval_steps_per_second": 1.262, |
|
"step": 2983 |
|
}, |
|
{ |
|
"epoch": 155.84, |
|
"grad_norm": 0.8866944313049316, |
|
"learning_rate": 9.49122807017544e-06, |
|
"loss": 1.1379, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 156.0, |
|
"eval_gen_len": 10.0522, |
|
"eval_loss": 0.9357353448867798, |
|
"eval_rouge1": 0.0017, |
|
"eval_rouge2": 0.0011, |
|
"eval_rougeL": 0.0017, |
|
"eval_rougeLsum": 0.0017, |
|
"eval_runtime": 16.7405, |
|
"eval_samples_per_second": 6.87, |
|
"eval_steps_per_second": 1.195, |
|
"step": 3003 |
|
}, |
|
{ |
|
"epoch": 156.99, |
|
"eval_gen_len": 10.1217, |
|
"eval_loss": 0.9312177300453186, |
|
"eval_rouge1": 0.0017, |
|
"eval_rouge2": 0.0011, |
|
"eval_rougeL": 0.0017, |
|
"eval_rougeLsum": 0.0017, |
|
"eval_runtime": 12.164, |
|
"eval_samples_per_second": 9.454, |
|
"eval_steps_per_second": 1.644, |
|
"step": 3022 |
|
}, |
|
{ |
|
"epoch": 157.97, |
|
"eval_gen_len": 10.2609, |
|
"eval_loss": 0.9275165796279907, |
|
"eval_rouge1": 0.0027, |
|
"eval_rouge2": 0.0006, |
|
"eval_rougeL": 0.0022, |
|
"eval_rougeLsum": 0.0022, |
|
"eval_runtime": 15.2713, |
|
"eval_samples_per_second": 7.53, |
|
"eval_steps_per_second": 1.31, |
|
"step": 3041 |
|
}, |
|
{ |
|
"epoch": 158.96, |
|
"eval_gen_len": 10.4435, |
|
"eval_loss": 0.9236345887184143, |
|
"eval_rouge1": 0.0036, |
|
"eval_rouge2": 0.0006, |
|
"eval_rougeL": 0.0028, |
|
"eval_rougeLsum": 0.0029, |
|
"eval_runtime": 17.5627, |
|
"eval_samples_per_second": 6.548, |
|
"eval_steps_per_second": 1.139, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 160.0, |
|
"eval_gen_len": 10.513, |
|
"eval_loss": 0.9195658564567566, |
|
"eval_rouge1": 0.0049, |
|
"eval_rouge2": 0.0012, |
|
"eval_rougeL": 0.0044, |
|
"eval_rougeLsum": 0.0044, |
|
"eval_runtime": 16.5853, |
|
"eval_samples_per_second": 6.934, |
|
"eval_steps_per_second": 1.206, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 160.99, |
|
"eval_gen_len": 10.487, |
|
"eval_loss": 0.9164186120033264, |
|
"eval_rouge1": 0.0046, |
|
"eval_rouge2": 0.0007, |
|
"eval_rougeL": 0.0038, |
|
"eval_rougeLsum": 0.0038, |
|
"eval_runtime": 15.397, |
|
"eval_samples_per_second": 7.469, |
|
"eval_steps_per_second": 1.299, |
|
"step": 3099 |
|
}, |
|
{ |
|
"epoch": 161.97, |
|
"eval_gen_len": 10.4783, |
|
"eval_loss": 0.9130675196647644, |
|
"eval_rouge1": 0.0039, |
|
"eval_rouge2": 0.0007, |
|
"eval_rougeL": 0.0034, |
|
"eval_rougeLsum": 0.0034, |
|
"eval_runtime": 16.4247, |
|
"eval_samples_per_second": 7.002, |
|
"eval_steps_per_second": 1.218, |
|
"step": 3118 |
|
}, |
|
{ |
|
"epoch": 162.96, |
|
"eval_gen_len": 10.6522, |
|
"eval_loss": 0.9092690944671631, |
|
"eval_rouge1": 0.007, |
|
"eval_rouge2": 0.0023, |
|
"eval_rougeL": 0.0066, |
|
"eval_rougeLsum": 0.0065, |
|
"eval_runtime": 14.8696, |
|
"eval_samples_per_second": 7.734, |
|
"eval_steps_per_second": 1.345, |
|
"step": 3137 |
|
}, |
|
{ |
|
"epoch": 164.0, |
|
"eval_gen_len": 10.5739, |
|
"eval_loss": 0.9059688448905945, |
|
"eval_rouge1": 0.005, |
|
"eval_rouge2": 0.001, |
|
"eval_rougeL": 0.0044, |
|
"eval_rougeLsum": 0.0043, |
|
"eval_runtime": 15.7264, |
|
"eval_samples_per_second": 7.313, |
|
"eval_steps_per_second": 1.272, |
|
"step": 3157 |
|
}, |
|
{ |
|
"epoch": 164.99, |
|
"eval_gen_len": 10.7391, |
|
"eval_loss": 0.9024509191513062, |
|
"eval_rouge1": 0.0074, |
|
"eval_rouge2": 0.0023, |
|
"eval_rougeL": 0.0068, |
|
"eval_rougeLsum": 0.0066, |
|
"eval_runtime": 15.813, |
|
"eval_samples_per_second": 7.272, |
|
"eval_steps_per_second": 1.265, |
|
"step": 3176 |
|
}, |
|
{ |
|
"epoch": 165.97, |
|
"eval_gen_len": 10.5652, |
|
"eval_loss": 0.8994614481925964, |
|
"eval_rouge1": 0.0054, |
|
"eval_rouge2": 0.001, |
|
"eval_rougeL": 0.0048, |
|
"eval_rougeLsum": 0.0048, |
|
"eval_runtime": 15.1711, |
|
"eval_samples_per_second": 7.58, |
|
"eval_steps_per_second": 1.318, |
|
"step": 3195 |
|
}, |
|
{ |
|
"epoch": 166.96, |
|
"eval_gen_len": 10.5913, |
|
"eval_loss": 0.8970102667808533, |
|
"eval_rouge1": 0.0061, |
|
"eval_rouge2": 0.0014, |
|
"eval_rougeL": 0.0053, |
|
"eval_rougeLsum": 0.0053, |
|
"eval_runtime": 14.0622, |
|
"eval_samples_per_second": 8.178, |
|
"eval_steps_per_second": 1.422, |
|
"step": 3214 |
|
}, |
|
{ |
|
"epoch": 168.0, |
|
"eval_gen_len": 10.6174, |
|
"eval_loss": 0.894256055355072, |
|
"eval_rouge1": 0.0082, |
|
"eval_rouge2": 0.003, |
|
"eval_rougeL": 0.0077, |
|
"eval_rougeLsum": 0.0075, |
|
"eval_runtime": 16.7533, |
|
"eval_samples_per_second": 6.864, |
|
"eval_steps_per_second": 1.194, |
|
"step": 3234 |
|
}, |
|
{ |
|
"epoch": 168.99, |
|
"eval_gen_len": 10.6348, |
|
"eval_loss": 0.891488790512085, |
|
"eval_rouge1": 0.0092, |
|
"eval_rouge2": 0.0029, |
|
"eval_rougeL": 0.0083, |
|
"eval_rougeLsum": 0.0081, |
|
"eval_runtime": 13.6019, |
|
"eval_samples_per_second": 8.455, |
|
"eval_steps_per_second": 1.47, |
|
"step": 3253 |
|
}, |
|
{ |
|
"epoch": 169.97, |
|
"eval_gen_len": 10.5913, |
|
"eval_loss": 0.8882649540901184, |
|
"eval_rouge1": 0.0073, |
|
"eval_rouge2": 0.0022, |
|
"eval_rougeL": 0.0068, |
|
"eval_rougeLsum": 0.0067, |
|
"eval_runtime": 16.0681, |
|
"eval_samples_per_second": 7.157, |
|
"eval_steps_per_second": 1.245, |
|
"step": 3272 |
|
}, |
|
{ |
|
"epoch": 170.96, |
|
"eval_gen_len": 10.6522, |
|
"eval_loss": 0.8857714533805847, |
|
"eval_rouge1": 0.009, |
|
"eval_rouge2": 0.0025, |
|
"eval_rougeL": 0.0081, |
|
"eval_rougeLsum": 0.008, |
|
"eval_runtime": 19.0222, |
|
"eval_samples_per_second": 6.046, |
|
"eval_steps_per_second": 1.051, |
|
"step": 3291 |
|
}, |
|
{ |
|
"epoch": 172.0, |
|
"eval_gen_len": 10.5826, |
|
"eval_loss": 0.8824735283851624, |
|
"eval_rouge1": 0.0073, |
|
"eval_rouge2": 0.0018, |
|
"eval_rougeL": 0.0068, |
|
"eval_rougeLsum": 0.0068, |
|
"eval_runtime": 17.2556, |
|
"eval_samples_per_second": 6.665, |
|
"eval_steps_per_second": 1.159, |
|
"step": 3311 |
|
}, |
|
{ |
|
"epoch": 172.99, |
|
"eval_gen_len": 10.5913, |
|
"eval_loss": 0.8791074156761169, |
|
"eval_rouge1": 0.0077, |
|
"eval_rouge2": 0.0016, |
|
"eval_rougeL": 0.0066, |
|
"eval_rougeLsum": 0.0066, |
|
"eval_runtime": 15.3622, |
|
"eval_samples_per_second": 7.486, |
|
"eval_steps_per_second": 1.302, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 173.97, |
|
"eval_gen_len": 10.6174, |
|
"eval_loss": 0.8760549426078796, |
|
"eval_rouge1": 0.0078, |
|
"eval_rouge2": 0.0017, |
|
"eval_rougeL": 0.0069, |
|
"eval_rougeLsum": 0.007, |
|
"eval_runtime": 13.7617, |
|
"eval_samples_per_second": 8.357, |
|
"eval_steps_per_second": 1.453, |
|
"step": 3349 |
|
}, |
|
{ |
|
"epoch": 174.96, |
|
"eval_gen_len": 10.8348, |
|
"eval_loss": 0.8735494017601013, |
|
"eval_rouge1": 0.0099, |
|
"eval_rouge2": 0.0031, |
|
"eval_rougeL": 0.0093, |
|
"eval_rougeLsum": 0.0093, |
|
"eval_runtime": 16.3628, |
|
"eval_samples_per_second": 7.028, |
|
"eval_steps_per_second": 1.222, |
|
"step": 3368 |
|
}, |
|
{ |
|
"epoch": 176.0, |
|
"eval_gen_len": 10.8174, |
|
"eval_loss": 0.8713410496711731, |
|
"eval_rouge1": 0.0103, |
|
"eval_rouge2": 0.0031, |
|
"eval_rougeL": 0.0097, |
|
"eval_rougeLsum": 0.0098, |
|
"eval_runtime": 15.0408, |
|
"eval_samples_per_second": 7.646, |
|
"eval_steps_per_second": 1.33, |
|
"step": 3388 |
|
}, |
|
{ |
|
"epoch": 176.99, |
|
"eval_gen_len": 10.687, |
|
"eval_loss": 0.8688496947288513, |
|
"eval_rouge1": 0.0104, |
|
"eval_rouge2": 0.0027, |
|
"eval_rougeL": 0.0087, |
|
"eval_rougeLsum": 0.0087, |
|
"eval_runtime": 13.3269, |
|
"eval_samples_per_second": 8.629, |
|
"eval_steps_per_second": 1.501, |
|
"step": 3407 |
|
}, |
|
{ |
|
"epoch": 177.97, |
|
"eval_gen_len": 10.7304, |
|
"eval_loss": 0.8659321069717407, |
|
"eval_rouge1": 0.0102, |
|
"eval_rouge2": 0.0022, |
|
"eval_rougeL": 0.0085, |
|
"eval_rougeLsum": 0.0083, |
|
"eval_runtime": 15.8407, |
|
"eval_samples_per_second": 7.26, |
|
"eval_steps_per_second": 1.263, |
|
"step": 3426 |
|
}, |
|
{ |
|
"epoch": 178.96, |
|
"eval_gen_len": 10.9217, |
|
"eval_loss": 0.8626890778541565, |
|
"eval_rouge1": 0.0109, |
|
"eval_rouge2": 0.0025, |
|
"eval_rougeL": 0.0086, |
|
"eval_rougeLsum": 0.0085, |
|
"eval_runtime": 15.1338, |
|
"eval_samples_per_second": 7.599, |
|
"eval_steps_per_second": 1.322, |
|
"step": 3445 |
|
}, |
|
{ |
|
"epoch": 180.0, |
|
"eval_gen_len": 11.087, |
|
"eval_loss": 0.8599569201469421, |
|
"eval_rouge1": 0.0124, |
|
"eval_rouge2": 0.0025, |
|
"eval_rougeL": 0.0101, |
|
"eval_rougeLsum": 0.0101, |
|
"eval_runtime": 21.7846, |
|
"eval_samples_per_second": 5.279, |
|
"eval_steps_per_second": 0.918, |
|
"step": 3465 |
|
}, |
|
{ |
|
"epoch": 180.99, |
|
"eval_gen_len": 11.1478, |
|
"eval_loss": 0.8579829931259155, |
|
"eval_rouge1": 0.0132, |
|
"eval_rouge2": 0.0026, |
|
"eval_rougeL": 0.0111, |
|
"eval_rougeLsum": 0.0109, |
|
"eval_runtime": 14.4812, |
|
"eval_samples_per_second": 7.941, |
|
"eval_steps_per_second": 1.381, |
|
"step": 3484 |
|
}, |
|
{ |
|
"epoch": 181.82, |
|
"grad_norm": 0.5403133034706116, |
|
"learning_rate": 7.736842105263158e-06, |
|
"loss": 1.0168, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 181.97, |
|
"eval_gen_len": 10.9739, |
|
"eval_loss": 0.8559067845344543, |
|
"eval_rouge1": 0.011, |
|
"eval_rouge2": 0.0027, |
|
"eval_rougeL": 0.0095, |
|
"eval_rougeLsum": 0.0093, |
|
"eval_runtime": 13.8018, |
|
"eval_samples_per_second": 8.332, |
|
"eval_steps_per_second": 1.449, |
|
"step": 3503 |
|
}, |
|
{ |
|
"epoch": 182.96, |
|
"eval_gen_len": 10.9652, |
|
"eval_loss": 0.8531643152236938, |
|
"eval_rouge1": 0.0122, |
|
"eval_rouge2": 0.0033, |
|
"eval_rougeL": 0.0101, |
|
"eval_rougeLsum": 0.01, |
|
"eval_runtime": 15.9407, |
|
"eval_samples_per_second": 7.214, |
|
"eval_steps_per_second": 1.255, |
|
"step": 3522 |
|
}, |
|
{ |
|
"epoch": 184.0, |
|
"eval_gen_len": 11.0609, |
|
"eval_loss": 0.8499117493629456, |
|
"eval_rouge1": 0.0141, |
|
"eval_rouge2": 0.0034, |
|
"eval_rougeL": 0.0121, |
|
"eval_rougeLsum": 0.012, |
|
"eval_runtime": 13.074, |
|
"eval_samples_per_second": 8.796, |
|
"eval_steps_per_second": 1.53, |
|
"step": 3542 |
|
}, |
|
{ |
|
"epoch": 184.99, |
|
"eval_gen_len": 11.3913, |
|
"eval_loss": 0.8471864461898804, |
|
"eval_rouge1": 0.0178, |
|
"eval_rouge2": 0.0037, |
|
"eval_rougeL": 0.0152, |
|
"eval_rougeLsum": 0.0149, |
|
"eval_runtime": 13.6132, |
|
"eval_samples_per_second": 8.448, |
|
"eval_steps_per_second": 1.469, |
|
"step": 3561 |
|
}, |
|
{ |
|
"epoch": 185.97, |
|
"eval_gen_len": 11.287, |
|
"eval_loss": 0.8454113602638245, |
|
"eval_rouge1": 0.0173, |
|
"eval_rouge2": 0.0036, |
|
"eval_rougeL": 0.0145, |
|
"eval_rougeLsum": 0.0141, |
|
"eval_runtime": 12.8847, |
|
"eval_samples_per_second": 8.925, |
|
"eval_steps_per_second": 1.552, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 186.96, |
|
"eval_gen_len": 11.2261, |
|
"eval_loss": 0.8434880375862122, |
|
"eval_rouge1": 0.017, |
|
"eval_rouge2": 0.0027, |
|
"eval_rougeL": 0.0143, |
|
"eval_rougeLsum": 0.0141, |
|
"eval_runtime": 11.4202, |
|
"eval_samples_per_second": 10.07, |
|
"eval_steps_per_second": 1.751, |
|
"step": 3599 |
|
}, |
|
{ |
|
"epoch": 188.0, |
|
"eval_gen_len": 11.3913, |
|
"eval_loss": 0.840716540813446, |
|
"eval_rouge1": 0.0188, |
|
"eval_rouge2": 0.0032, |
|
"eval_rougeL": 0.0161, |
|
"eval_rougeLsum": 0.0159, |
|
"eval_runtime": 10.5922, |
|
"eval_samples_per_second": 10.857, |
|
"eval_steps_per_second": 1.888, |
|
"step": 3619 |
|
}, |
|
{ |
|
"epoch": 188.99, |
|
"eval_gen_len": 11.2087, |
|
"eval_loss": 0.8385959267616272, |
|
"eval_rouge1": 0.0166, |
|
"eval_rouge2": 0.0033, |
|
"eval_rougeL": 0.0144, |
|
"eval_rougeLsum": 0.0141, |
|
"eval_runtime": 11.9064, |
|
"eval_samples_per_second": 9.659, |
|
"eval_steps_per_second": 1.68, |
|
"step": 3638 |
|
}, |
|
{ |
|
"epoch": 189.97, |
|
"eval_gen_len": 11.2609, |
|
"eval_loss": 0.836624026298523, |
|
"eval_rouge1": 0.0169, |
|
"eval_rouge2": 0.0031, |
|
"eval_rougeL": 0.0147, |
|
"eval_rougeLsum": 0.0144, |
|
"eval_runtime": 10.125, |
|
"eval_samples_per_second": 11.358, |
|
"eval_steps_per_second": 1.975, |
|
"step": 3657 |
|
}, |
|
{ |
|
"epoch": 190.96, |
|
"eval_gen_len": 11.2522, |
|
"eval_loss": 0.834960401058197, |
|
"eval_rouge1": 0.0181, |
|
"eval_rouge2": 0.0038, |
|
"eval_rougeL": 0.0159, |
|
"eval_rougeLsum": 0.0158, |
|
"eval_runtime": 10.4265, |
|
"eval_samples_per_second": 11.03, |
|
"eval_steps_per_second": 1.918, |
|
"step": 3676 |
|
}, |
|
{ |
|
"epoch": 192.0, |
|
"eval_gen_len": 11.6174, |
|
"eval_loss": 0.832145631313324, |
|
"eval_rouge1": 0.0223, |
|
"eval_rouge2": 0.0048, |
|
"eval_rougeL": 0.0198, |
|
"eval_rougeLsum": 0.0197, |
|
"eval_runtime": 14.2788, |
|
"eval_samples_per_second": 8.054, |
|
"eval_steps_per_second": 1.401, |
|
"step": 3696 |
|
}, |
|
{ |
|
"epoch": 192.99, |
|
"eval_gen_len": 11.5913, |
|
"eval_loss": 0.8298683166503906, |
|
"eval_rouge1": 0.0238, |
|
"eval_rouge2": 0.0054, |
|
"eval_rougeL": 0.021, |
|
"eval_rougeLsum": 0.0208, |
|
"eval_runtime": 13.7725, |
|
"eval_samples_per_second": 8.35, |
|
"eval_steps_per_second": 1.452, |
|
"step": 3715 |
|
}, |
|
{ |
|
"epoch": 193.97, |
|
"eval_gen_len": 11.513, |
|
"eval_loss": 0.8281151056289673, |
|
"eval_rouge1": 0.0238, |
|
"eval_rouge2": 0.0057, |
|
"eval_rougeL": 0.0208, |
|
"eval_rougeLsum": 0.0206, |
|
"eval_runtime": 16.5357, |
|
"eval_samples_per_second": 6.955, |
|
"eval_steps_per_second": 1.21, |
|
"step": 3734 |
|
}, |
|
{ |
|
"epoch": 194.96, |
|
"eval_gen_len": 11.4696, |
|
"eval_loss": 0.8263967633247375, |
|
"eval_rouge1": 0.0242, |
|
"eval_rouge2": 0.0054, |
|
"eval_rougeL": 0.0212, |
|
"eval_rougeLsum": 0.0209, |
|
"eval_runtime": 14.9165, |
|
"eval_samples_per_second": 7.71, |
|
"eval_steps_per_second": 1.341, |
|
"step": 3753 |
|
}, |
|
{ |
|
"epoch": 196.0, |
|
"eval_gen_len": 11.513, |
|
"eval_loss": 0.8241834044456482, |
|
"eval_rouge1": 0.0242, |
|
"eval_rouge2": 0.0054, |
|
"eval_rougeL": 0.0203, |
|
"eval_rougeLsum": 0.0202, |
|
"eval_runtime": 13.3344, |
|
"eval_samples_per_second": 8.624, |
|
"eval_steps_per_second": 1.5, |
|
"step": 3773 |
|
}, |
|
{ |
|
"epoch": 196.99, |
|
"eval_gen_len": 11.8348, |
|
"eval_loss": 0.8214186429977417, |
|
"eval_rouge1": 0.0277, |
|
"eval_rouge2": 0.0058, |
|
"eval_rougeL": 0.0246, |
|
"eval_rougeLsum": 0.0242, |
|
"eval_runtime": 15.0895, |
|
"eval_samples_per_second": 7.621, |
|
"eval_steps_per_second": 1.325, |
|
"step": 3792 |
|
}, |
|
{ |
|
"epoch": 197.97, |
|
"eval_gen_len": 11.6435, |
|
"eval_loss": 0.8196175694465637, |
|
"eval_rouge1": 0.0262, |
|
"eval_rouge2": 0.0056, |
|
"eval_rougeL": 0.0226, |
|
"eval_rougeLsum": 0.0227, |
|
"eval_runtime": 14.814, |
|
"eval_samples_per_second": 7.763, |
|
"eval_steps_per_second": 1.35, |
|
"step": 3811 |
|
}, |
|
{ |
|
"epoch": 198.96, |
|
"eval_gen_len": 11.7043, |
|
"eval_loss": 0.8178415894508362, |
|
"eval_rouge1": 0.0293, |
|
"eval_rouge2": 0.006, |
|
"eval_rougeL": 0.025, |
|
"eval_rougeLsum": 0.0248, |
|
"eval_runtime": 15.1097, |
|
"eval_samples_per_second": 7.611, |
|
"eval_steps_per_second": 1.324, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"eval_gen_len": 11.8783, |
|
"eval_loss": 0.815380334854126, |
|
"eval_rouge1": 0.0318, |
|
"eval_rouge2": 0.006, |
|
"eval_rougeL": 0.0273, |
|
"eval_rougeLsum": 0.027, |
|
"eval_runtime": 14.6624, |
|
"eval_samples_per_second": 7.843, |
|
"eval_steps_per_second": 1.364, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 200.99, |
|
"eval_gen_len": 11.7913, |
|
"eval_loss": 0.8136902451515198, |
|
"eval_rouge1": 0.0307, |
|
"eval_rouge2": 0.0058, |
|
"eval_rougeL": 0.0265, |
|
"eval_rougeLsum": 0.0262, |
|
"eval_runtime": 14.2757, |
|
"eval_samples_per_second": 8.056, |
|
"eval_steps_per_second": 1.401, |
|
"step": 3869 |
|
}, |
|
{ |
|
"epoch": 201.97, |
|
"eval_gen_len": 11.8, |
|
"eval_loss": 0.8119075894355774, |
|
"eval_rouge1": 0.032, |
|
"eval_rouge2": 0.0061, |
|
"eval_rougeL": 0.0279, |
|
"eval_rougeLsum": 0.0277, |
|
"eval_runtime": 14.1634, |
|
"eval_samples_per_second": 8.12, |
|
"eval_steps_per_second": 1.412, |
|
"step": 3888 |
|
}, |
|
{ |
|
"epoch": 202.96, |
|
"eval_gen_len": 11.9652, |
|
"eval_loss": 0.8098872900009155, |
|
"eval_rouge1": 0.0334, |
|
"eval_rouge2": 0.0062, |
|
"eval_rougeL": 0.0289, |
|
"eval_rougeLsum": 0.0285, |
|
"eval_runtime": 17.6617, |
|
"eval_samples_per_second": 6.511, |
|
"eval_steps_per_second": 1.132, |
|
"step": 3907 |
|
}, |
|
{ |
|
"epoch": 204.0, |
|
"eval_gen_len": 12.0522, |
|
"eval_loss": 0.8077185750007629, |
|
"eval_rouge1": 0.0339, |
|
"eval_rouge2": 0.0068, |
|
"eval_rougeL": 0.0293, |
|
"eval_rougeLsum": 0.0291, |
|
"eval_runtime": 18.6428, |
|
"eval_samples_per_second": 6.169, |
|
"eval_steps_per_second": 1.073, |
|
"step": 3927 |
|
}, |
|
{ |
|
"epoch": 204.99, |
|
"eval_gen_len": 11.9478, |
|
"eval_loss": 0.8060031533241272, |
|
"eval_rouge1": 0.0331, |
|
"eval_rouge2": 0.0065, |
|
"eval_rougeL": 0.0286, |
|
"eval_rougeLsum": 0.0284, |
|
"eval_runtime": 28.209, |
|
"eval_samples_per_second": 4.077, |
|
"eval_steps_per_second": 0.709, |
|
"step": 3946 |
|
}, |
|
{ |
|
"epoch": 205.97, |
|
"eval_gen_len": 12.2087, |
|
"eval_loss": 0.8041767477989197, |
|
"eval_rouge1": 0.038, |
|
"eval_rouge2": 0.0083, |
|
"eval_rougeL": 0.0331, |
|
"eval_rougeLsum": 0.0329, |
|
"eval_runtime": 13.9196, |
|
"eval_samples_per_second": 8.262, |
|
"eval_steps_per_second": 1.437, |
|
"step": 3965 |
|
}, |
|
{ |
|
"epoch": 206.96, |
|
"eval_gen_len": 12.2348, |
|
"eval_loss": 0.8022732138633728, |
|
"eval_rouge1": 0.04, |
|
"eval_rouge2": 0.0093, |
|
"eval_rougeL": 0.0351, |
|
"eval_rougeLsum": 0.0348, |
|
"eval_runtime": 18.1652, |
|
"eval_samples_per_second": 6.331, |
|
"eval_steps_per_second": 1.101, |
|
"step": 3984 |
|
}, |
|
{ |
|
"epoch": 207.79, |
|
"grad_norm": 0.6825528740882874, |
|
"learning_rate": 5.982456140350877e-06, |
|
"loss": 0.9396, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 208.0, |
|
"eval_gen_len": 11.9913, |
|
"eval_loss": 0.8004079461097717, |
|
"eval_rouge1": 0.0377, |
|
"eval_rouge2": 0.0083, |
|
"eval_rougeL": 0.0326, |
|
"eval_rougeLsum": 0.0324, |
|
"eval_runtime": 15.4261, |
|
"eval_samples_per_second": 7.455, |
|
"eval_steps_per_second": 1.297, |
|
"step": 4004 |
|
}, |
|
{ |
|
"epoch": 208.99, |
|
"eval_gen_len": 12.2435, |
|
"eval_loss": 0.7987371683120728, |
|
"eval_rouge1": 0.0394, |
|
"eval_rouge2": 0.0081, |
|
"eval_rougeL": 0.0329, |
|
"eval_rougeLsum": 0.0326, |
|
"eval_runtime": 15.4914, |
|
"eval_samples_per_second": 7.423, |
|
"eval_steps_per_second": 1.291, |
|
"step": 4023 |
|
}, |
|
{ |
|
"epoch": 209.97, |
|
"eval_gen_len": 12.2174, |
|
"eval_loss": 0.7974857687950134, |
|
"eval_rouge1": 0.0398, |
|
"eval_rouge2": 0.0088, |
|
"eval_rougeL": 0.0348, |
|
"eval_rougeLsum": 0.0344, |
|
"eval_runtime": 15.8068, |
|
"eval_samples_per_second": 7.275, |
|
"eval_steps_per_second": 1.265, |
|
"step": 4042 |
|
}, |
|
{ |
|
"epoch": 210.96, |
|
"eval_gen_len": 12.2696, |
|
"eval_loss": 0.7953728437423706, |
|
"eval_rouge1": 0.0415, |
|
"eval_rouge2": 0.009, |
|
"eval_rougeL": 0.0365, |
|
"eval_rougeLsum": 0.036, |
|
"eval_runtime": 15.5151, |
|
"eval_samples_per_second": 7.412, |
|
"eval_steps_per_second": 1.289, |
|
"step": 4061 |
|
}, |
|
{ |
|
"epoch": 212.0, |
|
"eval_gen_len": 12.1304, |
|
"eval_loss": 0.7937628626823425, |
|
"eval_rouge1": 0.0418, |
|
"eval_rouge2": 0.009, |
|
"eval_rougeL": 0.037, |
|
"eval_rougeLsum": 0.0366, |
|
"eval_runtime": 15.6988, |
|
"eval_samples_per_second": 7.325, |
|
"eval_steps_per_second": 1.274, |
|
"step": 4081 |
|
}, |
|
{ |
|
"epoch": 212.99, |
|
"eval_gen_len": 12.1043, |
|
"eval_loss": 0.7920788526535034, |
|
"eval_rouge1": 0.0416, |
|
"eval_rouge2": 0.009, |
|
"eval_rougeL": 0.0369, |
|
"eval_rougeLsum": 0.0367, |
|
"eval_runtime": 14.6071, |
|
"eval_samples_per_second": 7.873, |
|
"eval_steps_per_second": 1.369, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 213.97, |
|
"eval_gen_len": 11.9652, |
|
"eval_loss": 0.7905020117759705, |
|
"eval_rouge1": 0.041, |
|
"eval_rouge2": 0.0078, |
|
"eval_rougeL": 0.036, |
|
"eval_rougeLsum": 0.0357, |
|
"eval_runtime": 16.4932, |
|
"eval_samples_per_second": 6.973, |
|
"eval_steps_per_second": 1.213, |
|
"step": 4119 |
|
}, |
|
{ |
|
"epoch": 214.96, |
|
"eval_gen_len": 11.9391, |
|
"eval_loss": 0.7891045212745667, |
|
"eval_rouge1": 0.0411, |
|
"eval_rouge2": 0.0078, |
|
"eval_rougeL": 0.0361, |
|
"eval_rougeLsum": 0.0358, |
|
"eval_runtime": 15.0709, |
|
"eval_samples_per_second": 7.631, |
|
"eval_steps_per_second": 1.327, |
|
"step": 4138 |
|
}, |
|
{ |
|
"epoch": 216.0, |
|
"eval_gen_len": 12.1739, |
|
"eval_loss": 0.7874982953071594, |
|
"eval_rouge1": 0.0426, |
|
"eval_rouge2": 0.0081, |
|
"eval_rougeL": 0.0366, |
|
"eval_rougeLsum": 0.0363, |
|
"eval_runtime": 15.3399, |
|
"eval_samples_per_second": 7.497, |
|
"eval_steps_per_second": 1.304, |
|
"step": 4158 |
|
}, |
|
{ |
|
"epoch": 216.99, |
|
"eval_gen_len": 12.3043, |
|
"eval_loss": 0.7856701016426086, |
|
"eval_rouge1": 0.0444, |
|
"eval_rouge2": 0.0092, |
|
"eval_rougeL": 0.0384, |
|
"eval_rougeLsum": 0.0383, |
|
"eval_runtime": 16.5308, |
|
"eval_samples_per_second": 6.957, |
|
"eval_steps_per_second": 1.21, |
|
"step": 4177 |
|
}, |
|
{ |
|
"epoch": 217.97, |
|
"eval_gen_len": 12.2957, |
|
"eval_loss": 0.7841366529464722, |
|
"eval_rouge1": 0.0445, |
|
"eval_rouge2": 0.0092, |
|
"eval_rougeL": 0.039, |
|
"eval_rougeLsum": 0.0388, |
|
"eval_runtime": 13.9248, |
|
"eval_samples_per_second": 8.259, |
|
"eval_steps_per_second": 1.436, |
|
"step": 4196 |
|
}, |
|
{ |
|
"epoch": 218.96, |
|
"eval_gen_len": 12.313, |
|
"eval_loss": 0.7825812101364136, |
|
"eval_rouge1": 0.0443, |
|
"eval_rouge2": 0.0087, |
|
"eval_rougeL": 0.0382, |
|
"eval_rougeLsum": 0.038, |
|
"eval_runtime": 14.6481, |
|
"eval_samples_per_second": 7.851, |
|
"eval_steps_per_second": 1.365, |
|
"step": 4215 |
|
}, |
|
{ |
|
"epoch": 220.0, |
|
"eval_gen_len": 12.1217, |
|
"eval_loss": 0.7813829779624939, |
|
"eval_rouge1": 0.0438, |
|
"eval_rouge2": 0.0085, |
|
"eval_rougeL": 0.0379, |
|
"eval_rougeLsum": 0.0375, |
|
"eval_runtime": 16.4777, |
|
"eval_samples_per_second": 6.979, |
|
"eval_steps_per_second": 1.214, |
|
"step": 4235 |
|
}, |
|
{ |
|
"epoch": 220.99, |
|
"eval_gen_len": 12.0348, |
|
"eval_loss": 0.7796338796615601, |
|
"eval_rouge1": 0.0431, |
|
"eval_rouge2": 0.0085, |
|
"eval_rougeL": 0.0372, |
|
"eval_rougeLsum": 0.0369, |
|
"eval_runtime": 15.2958, |
|
"eval_samples_per_second": 7.518, |
|
"eval_steps_per_second": 1.308, |
|
"step": 4254 |
|
}, |
|
{ |
|
"epoch": 221.97, |
|
"eval_gen_len": 12.1043, |
|
"eval_loss": 0.77826988697052, |
|
"eval_rouge1": 0.0423, |
|
"eval_rouge2": 0.009, |
|
"eval_rougeL": 0.0365, |
|
"eval_rougeLsum": 0.0362, |
|
"eval_runtime": 16.2627, |
|
"eval_samples_per_second": 7.071, |
|
"eval_steps_per_second": 1.23, |
|
"step": 4273 |
|
}, |
|
{ |
|
"epoch": 222.96, |
|
"eval_gen_len": 12.0435, |
|
"eval_loss": 0.7768360376358032, |
|
"eval_rouge1": 0.0426, |
|
"eval_rouge2": 0.009, |
|
"eval_rougeL": 0.0365, |
|
"eval_rougeLsum": 0.0363, |
|
"eval_runtime": 24.5776, |
|
"eval_samples_per_second": 4.679, |
|
"eval_steps_per_second": 0.814, |
|
"step": 4292 |
|
}, |
|
{ |
|
"epoch": 224.0, |
|
"eval_gen_len": 12.0, |
|
"eval_loss": 0.7752098441123962, |
|
"eval_rouge1": 0.0425, |
|
"eval_rouge2": 0.009, |
|
"eval_rougeL": 0.0363, |
|
"eval_rougeLsum": 0.0361, |
|
"eval_runtime": 13.756, |
|
"eval_samples_per_second": 8.36, |
|
"eval_steps_per_second": 1.454, |
|
"step": 4312 |
|
}, |
|
{ |
|
"epoch": 224.99, |
|
"eval_gen_len": 11.9391, |
|
"eval_loss": 0.7739911675453186, |
|
"eval_rouge1": 0.043, |
|
"eval_rouge2": 0.009, |
|
"eval_rougeL": 0.0371, |
|
"eval_rougeLsum": 0.0367, |
|
"eval_runtime": 15.726, |
|
"eval_samples_per_second": 7.313, |
|
"eval_steps_per_second": 1.272, |
|
"step": 4331 |
|
}, |
|
{ |
|
"epoch": 225.97, |
|
"eval_gen_len": 11.8609, |
|
"eval_loss": 0.7723690271377563, |
|
"eval_rouge1": 0.0414, |
|
"eval_rouge2": 0.009, |
|
"eval_rougeL": 0.0357, |
|
"eval_rougeLsum": 0.0355, |
|
"eval_runtime": 16.5662, |
|
"eval_samples_per_second": 6.942, |
|
"eval_steps_per_second": 1.207, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 226.96, |
|
"eval_gen_len": 11.7652, |
|
"eval_loss": 0.7711983919143677, |
|
"eval_rouge1": 0.0429, |
|
"eval_rouge2": 0.0093, |
|
"eval_rougeL": 0.0363, |
|
"eval_rougeLsum": 0.0359, |
|
"eval_runtime": 14.4971, |
|
"eval_samples_per_second": 7.933, |
|
"eval_steps_per_second": 1.38, |
|
"step": 4369 |
|
}, |
|
{ |
|
"epoch": 228.0, |
|
"eval_gen_len": 11.913, |
|
"eval_loss": 0.7694764733314514, |
|
"eval_rouge1": 0.0416, |
|
"eval_rouge2": 0.0093, |
|
"eval_rougeL": 0.0357, |
|
"eval_rougeLsum": 0.0354, |
|
"eval_runtime": 13.7683, |
|
"eval_samples_per_second": 8.353, |
|
"eval_steps_per_second": 1.453, |
|
"step": 4389 |
|
}, |
|
{ |
|
"epoch": 228.99, |
|
"eval_gen_len": 12.0087, |
|
"eval_loss": 0.7683370113372803, |
|
"eval_rouge1": 0.0426, |
|
"eval_rouge2": 0.01, |
|
"eval_rougeL": 0.0369, |
|
"eval_rougeLsum": 0.0364, |
|
"eval_runtime": 14.7051, |
|
"eval_samples_per_second": 7.82, |
|
"eval_steps_per_second": 1.36, |
|
"step": 4408 |
|
}, |
|
{ |
|
"epoch": 229.97, |
|
"eval_gen_len": 12.0696, |
|
"eval_loss": 0.7668902277946472, |
|
"eval_rouge1": 0.0422, |
|
"eval_rouge2": 0.0095, |
|
"eval_rougeL": 0.0364, |
|
"eval_rougeLsum": 0.036, |
|
"eval_runtime": 12.5967, |
|
"eval_samples_per_second": 9.129, |
|
"eval_steps_per_second": 1.588, |
|
"step": 4427 |
|
}, |
|
{ |
|
"epoch": 230.96, |
|
"eval_gen_len": 11.7217, |
|
"eval_loss": 0.7656229734420776, |
|
"eval_rouge1": 0.0396, |
|
"eval_rouge2": 0.0094, |
|
"eval_rougeL": 0.0342, |
|
"eval_rougeLsum": 0.0339, |
|
"eval_runtime": 10.0582, |
|
"eval_samples_per_second": 11.433, |
|
"eval_steps_per_second": 1.988, |
|
"step": 4446 |
|
}, |
|
{ |
|
"epoch": 232.0, |
|
"eval_gen_len": 11.5652, |
|
"eval_loss": 0.7644599676132202, |
|
"eval_rouge1": 0.0411, |
|
"eval_rouge2": 0.0091, |
|
"eval_rougeL": 0.0352, |
|
"eval_rougeLsum": 0.0349, |
|
"eval_runtime": 9.9608, |
|
"eval_samples_per_second": 11.545, |
|
"eval_steps_per_second": 2.008, |
|
"step": 4466 |
|
}, |
|
{ |
|
"epoch": 232.99, |
|
"eval_gen_len": 11.7826, |
|
"eval_loss": 0.7628152370452881, |
|
"eval_rouge1": 0.0421, |
|
"eval_rouge2": 0.0095, |
|
"eval_rougeL": 0.0371, |
|
"eval_rougeLsum": 0.0371, |
|
"eval_runtime": 10.6119, |
|
"eval_samples_per_second": 10.837, |
|
"eval_steps_per_second": 1.885, |
|
"step": 4485 |
|
}, |
|
{ |
|
"epoch": 233.77, |
|
"grad_norm": 0.5715782642364502, |
|
"learning_rate": 4.228070175438596e-06, |
|
"loss": 0.8871, |
|
"step": 4500 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 5700, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 300, |
|
"save_steps": 500, |
|
"total_flos": 1.3097521558270771e+17, |
|
"train_batch_size": 6, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|