|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 15.961630695443645, |
|
"eval_steps": 500, |
|
"global_step": 3328, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_f1": 0.8988, |
|
"eval_gen_len": 29.90909090909091, |
|
"eval_loss": 1.8075228929519653, |
|
"eval_precision": 0.901, |
|
"eval_recall": 0.897, |
|
"eval_rouge1": 0.411, |
|
"eval_rouge2": 0.1689, |
|
"eval_rougeL": 0.3152, |
|
"eval_rougeLsum": 0.3155, |
|
"eval_runtime": 182.3495, |
|
"eval_samples_per_second": 6.032, |
|
"eval_steps_per_second": 0.378, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_f1": 0.904, |
|
"eval_gen_len": 29.907272727272726, |
|
"eval_loss": 1.7311877012252808, |
|
"eval_precision": 0.9059, |
|
"eval_recall": 0.9024, |
|
"eval_rouge1": 0.4379, |
|
"eval_rouge2": 0.1893, |
|
"eval_rougeL": 0.3442, |
|
"eval_rougeLsum": 0.3446, |
|
"eval_runtime": 160.7204, |
|
"eval_samples_per_second": 6.844, |
|
"eval_steps_per_second": 0.429, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.699519230769231e-05, |
|
"loss": 2.0112, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_f1": 0.9055, |
|
"eval_gen_len": 30.017272727272726, |
|
"eval_loss": 1.6986640691757202, |
|
"eval_precision": 0.9075, |
|
"eval_recall": 0.9039, |
|
"eval_rouge1": 0.4475, |
|
"eval_rouge2": 0.1978, |
|
"eval_rougeL": 0.352, |
|
"eval_rougeLsum": 0.3525, |
|
"eval_runtime": 161.6717, |
|
"eval_samples_per_second": 6.804, |
|
"eval_steps_per_second": 0.427, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_f1": 0.9063, |
|
"eval_gen_len": 30.061818181818182, |
|
"eval_loss": 1.676792025566101, |
|
"eval_precision": 0.9082, |
|
"eval_recall": 0.9047, |
|
"eval_rouge1": 0.4514, |
|
"eval_rouge2": 0.1981, |
|
"eval_rougeL": 0.357, |
|
"eval_rougeLsum": 0.3573, |
|
"eval_runtime": 160.8885, |
|
"eval_samples_per_second": 6.837, |
|
"eval_steps_per_second": 0.429, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 1.3990384615384615e-05, |
|
"loss": 1.7647, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_f1": 0.9068, |
|
"eval_gen_len": 30.326363636363638, |
|
"eval_loss": 1.661742091178894, |
|
"eval_precision": 0.9084, |
|
"eval_recall": 0.9055, |
|
"eval_rouge1": 0.4537, |
|
"eval_rouge2": 0.2003, |
|
"eval_rougeL": 0.3592, |
|
"eval_rougeLsum": 0.3595, |
|
"eval_runtime": 162.5609, |
|
"eval_samples_per_second": 6.767, |
|
"eval_steps_per_second": 0.424, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_f1": 0.9072, |
|
"eval_gen_len": 30.082727272727272, |
|
"eval_loss": 1.6502095460891724, |
|
"eval_precision": 0.9089, |
|
"eval_recall": 0.9057, |
|
"eval_rouge1": 0.4554, |
|
"eval_rouge2": 0.2021, |
|
"eval_rougeL": 0.3607, |
|
"eval_rougeLsum": 0.361, |
|
"eval_runtime": 159.1789, |
|
"eval_samples_per_second": 6.91, |
|
"eval_steps_per_second": 0.433, |
|
"step": 1251 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_f1": 0.908, |
|
"eval_gen_len": 30.021818181818183, |
|
"eval_loss": 1.6416178941726685, |
|
"eval_precision": 0.9099, |
|
"eval_recall": 0.9064, |
|
"eval_rouge1": 0.4592, |
|
"eval_rouge2": 0.2052, |
|
"eval_rougeL": 0.3639, |
|
"eval_rougeLsum": 0.3641, |
|
"eval_runtime": 156.6401, |
|
"eval_samples_per_second": 7.022, |
|
"eval_steps_per_second": 0.441, |
|
"step": 1459 |
|
}, |
|
{ |
|
"epoch": 7.19, |
|
"learning_rate": 1.0985576923076924e-05, |
|
"loss": 1.6948, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_f1": 0.9081, |
|
"eval_gen_len": 30.78272727272727, |
|
"eval_loss": 1.6360372304916382, |
|
"eval_precision": 0.909, |
|
"eval_recall": 0.9074, |
|
"eval_rouge1": 0.4612, |
|
"eval_rouge2": 0.2054, |
|
"eval_rougeL": 0.3649, |
|
"eval_rougeLsum": 0.365, |
|
"eval_runtime": 160.7238, |
|
"eval_samples_per_second": 6.844, |
|
"eval_steps_per_second": 0.429, |
|
"step": 1668 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_f1": 0.9083, |
|
"eval_gen_len": 30.62909090909091, |
|
"eval_loss": 1.6301532983779907, |
|
"eval_precision": 0.9095, |
|
"eval_recall": 0.9074, |
|
"eval_rouge1": 0.4621, |
|
"eval_rouge2": 0.2062, |
|
"eval_rougeL": 0.3645, |
|
"eval_rougeLsum": 0.3647, |
|
"eval_runtime": 159.8662, |
|
"eval_samples_per_second": 6.881, |
|
"eval_steps_per_second": 0.432, |
|
"step": 1876 |
|
}, |
|
{ |
|
"epoch": 9.59, |
|
"learning_rate": 7.980769230769232e-06, |
|
"loss": 1.6501, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_f1": 0.9083, |
|
"eval_gen_len": 30.48181818181818, |
|
"eval_loss": 1.6264721155166626, |
|
"eval_precision": 0.9095, |
|
"eval_recall": 0.9073, |
|
"eval_rouge1": 0.4606, |
|
"eval_rouge2": 0.2051, |
|
"eval_rougeL": 0.3651, |
|
"eval_rougeLsum": 0.3655, |
|
"eval_runtime": 157.2711, |
|
"eval_samples_per_second": 6.994, |
|
"eval_steps_per_second": 0.439, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_f1": 0.9087, |
|
"eval_gen_len": 30.806363636363635, |
|
"eval_loss": 1.6229554414749146, |
|
"eval_precision": 0.9097, |
|
"eval_recall": 0.908, |
|
"eval_rouge1": 0.4625, |
|
"eval_rouge2": 0.2073, |
|
"eval_rougeL": 0.3658, |
|
"eval_rougeLsum": 0.366, |
|
"eval_runtime": 157.1263, |
|
"eval_samples_per_second": 7.001, |
|
"eval_steps_per_second": 0.439, |
|
"step": 2293 |
|
}, |
|
{ |
|
"epoch": 11.99, |
|
"learning_rate": 4.975961538461539e-06, |
|
"loss": 1.6222, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_f1": 0.909, |
|
"eval_gen_len": 30.55272727272727, |
|
"eval_loss": 1.6204967498779297, |
|
"eval_precision": 0.9103, |
|
"eval_recall": 0.9081, |
|
"eval_rouge1": 0.4644, |
|
"eval_rouge2": 0.2082, |
|
"eval_rougeL": 0.3674, |
|
"eval_rougeLsum": 0.3679, |
|
"eval_runtime": 155.7494, |
|
"eval_samples_per_second": 7.063, |
|
"eval_steps_per_second": 0.443, |
|
"step": 2502 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_f1": 0.909, |
|
"eval_gen_len": 30.805454545454545, |
|
"eval_loss": 1.618751883506775, |
|
"eval_precision": 0.9101, |
|
"eval_recall": 0.9083, |
|
"eval_rouge1": 0.4648, |
|
"eval_rouge2": 0.2087, |
|
"eval_rougeL": 0.3681, |
|
"eval_rougeLsum": 0.3683, |
|
"eval_runtime": 158.59, |
|
"eval_samples_per_second": 6.936, |
|
"eval_steps_per_second": 0.435, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_f1": 0.9093, |
|
"eval_gen_len": 30.670909090909092, |
|
"eval_loss": 1.6172130107879639, |
|
"eval_precision": 0.9104, |
|
"eval_recall": 0.9084, |
|
"eval_rouge1": 0.4654, |
|
"eval_rouge2": 0.2097, |
|
"eval_rougeL": 0.3685, |
|
"eval_rougeLsum": 0.3689, |
|
"eval_runtime": 159.3516, |
|
"eval_samples_per_second": 6.903, |
|
"eval_steps_per_second": 0.433, |
|
"step": 2919 |
|
}, |
|
{ |
|
"epoch": 14.39, |
|
"learning_rate": 1.971153846153846e-06, |
|
"loss": 1.6048, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_f1": 0.9093, |
|
"eval_gen_len": 30.630909090909093, |
|
"eval_loss": 1.6168792247772217, |
|
"eval_precision": 0.9104, |
|
"eval_recall": 0.9084, |
|
"eval_rouge1": 0.465, |
|
"eval_rouge2": 0.21, |
|
"eval_rougeL": 0.3693, |
|
"eval_rougeLsum": 0.3697, |
|
"eval_runtime": 158.4169, |
|
"eval_samples_per_second": 6.944, |
|
"eval_steps_per_second": 0.436, |
|
"step": 3127 |
|
}, |
|
{ |
|
"epoch": 15.96, |
|
"eval_f1": 0.9091, |
|
"eval_gen_len": 30.619090909090907, |
|
"eval_loss": 1.6166560649871826, |
|
"eval_precision": 0.9102, |
|
"eval_recall": 0.9083, |
|
"eval_rouge1": 0.4649, |
|
"eval_rouge2": 0.2096, |
|
"eval_rougeL": 0.3686, |
|
"eval_rougeLsum": 0.3688, |
|
"eval_runtime": 157.7317, |
|
"eval_samples_per_second": 6.974, |
|
"eval_steps_per_second": 0.437, |
|
"step": 3328 |
|
}, |
|
{ |
|
"epoch": 15.96, |
|
"step": 3328, |
|
"total_flos": 4.6122414295331635e+17, |
|
"train_loss": 1.711962864949153, |
|
"train_runtime": 11907.5247, |
|
"train_samples_per_second": 26.874, |
|
"train_steps_per_second": 0.279 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 3328, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 16, |
|
"save_steps": 500, |
|
"total_flos": 4.6122414295331635e+17, |
|
"train_batch_size": 24, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|