|
{ |
|
"best_metric": 0.51416015625, |
|
"best_model_checkpoint": "autotrain-jvq6k-yf3ca/checkpoint-570", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 570, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 8.074106342959622, |
|
"learning_rate": 4.210526315789474e-05, |
|
"loss": 4.7837, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 2.5780648325169624, |
|
"learning_rate": 0.0001394736842105263, |
|
"loss": 1.6204, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 2.533813969716365, |
|
"learning_rate": 0.00023684210526315788, |
|
"loss": 0.8889, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 2.3405358212713185, |
|
"learning_rate": 0.00033421052631578944, |
|
"loss": 0.8323, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1.6021401005483846, |
|
"learning_rate": 0.0004315789473684211, |
|
"loss": 0.7663, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 9.6497, |
|
"eval_loss": 0.5634765625, |
|
"eval_rouge1": 84.8501, |
|
"eval_rouge2": 72.9759, |
|
"eval_rougeL": 83.9381, |
|
"eval_rougeLsum": 83.9882, |
|
"eval_runtime": 59.9525, |
|
"eval_samples_per_second": 25.27, |
|
"eval_steps_per_second": 0.4, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 2.014072063970188, |
|
"learning_rate": 0.0004967836257309941, |
|
"loss": 0.6051, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"grad_norm": 2.5322811119510225, |
|
"learning_rate": 0.0004868421052631579, |
|
"loss": 0.6124, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 1.684844161543083, |
|
"learning_rate": 0.00047690058479532164, |
|
"loss": 0.5759, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 1.830769701886568, |
|
"learning_rate": 0.00046637426900584796, |
|
"loss": 0.6172, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 1.482259801017781, |
|
"learning_rate": 0.00045584795321637427, |
|
"loss": 0.5658, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 9.709, |
|
"eval_loss": 0.52587890625, |
|
"eval_rouge1": 86.3194, |
|
"eval_rouge2": 74.6858, |
|
"eval_rougeL": 85.4633, |
|
"eval_rougeLsum": 85.4901, |
|
"eval_runtime": 58.3784, |
|
"eval_samples_per_second": 25.951, |
|
"eval_steps_per_second": 0.411, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"grad_norm": 1.4815030414040293, |
|
"learning_rate": 0.0004450292397660819, |
|
"loss": 0.4243, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"grad_norm": 1.4036378645480725, |
|
"learning_rate": 0.0004342105263157895, |
|
"loss": 0.3784, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"grad_norm": 1.5836309025847368, |
|
"learning_rate": 0.0004233918128654971, |
|
"loss": 0.3789, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"grad_norm": 1.6685671897157268, |
|
"learning_rate": 0.0004125730994152047, |
|
"loss": 0.4048, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"grad_norm": 1.9497904549926495, |
|
"learning_rate": 0.0004017543859649123, |
|
"loss": 0.3807, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_gen_len": 9.8561, |
|
"eval_loss": 0.51416015625, |
|
"eval_rouge1": 87.4319, |
|
"eval_rouge2": 76.4229, |
|
"eval_rougeL": 86.4987, |
|
"eval_rougeLsum": 86.5222, |
|
"eval_runtime": 59.8804, |
|
"eval_samples_per_second": 25.3, |
|
"eval_steps_per_second": 0.401, |
|
"step": 570 |
|
} |
|
], |
|
"logging_steps": 37, |
|
"max_steps": 1900, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 438831341568.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|