echoctx's picture
Upload folder using huggingface_hub
787e443 verified
{
"best_metric": 0.51416015625,
"best_model_checkpoint": "autotrain-jvq6k-yf3ca/checkpoint-570",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 570,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.19,
"grad_norm": 8.074106342959622,
"learning_rate": 4.210526315789474e-05,
"loss": 4.7837,
"step": 37
},
{
"epoch": 0.39,
"grad_norm": 2.5780648325169624,
"learning_rate": 0.0001394736842105263,
"loss": 1.6204,
"step": 74
},
{
"epoch": 0.58,
"grad_norm": 2.533813969716365,
"learning_rate": 0.00023684210526315788,
"loss": 0.8889,
"step": 111
},
{
"epoch": 0.78,
"grad_norm": 2.3405358212713185,
"learning_rate": 0.00033421052631578944,
"loss": 0.8323,
"step": 148
},
{
"epoch": 0.97,
"grad_norm": 1.6021401005483846,
"learning_rate": 0.0004315789473684211,
"loss": 0.7663,
"step": 185
},
{
"epoch": 1.0,
"eval_gen_len": 9.6497,
"eval_loss": 0.5634765625,
"eval_rouge1": 84.8501,
"eval_rouge2": 72.9759,
"eval_rougeL": 83.9381,
"eval_rougeLsum": 83.9882,
"eval_runtime": 59.9525,
"eval_samples_per_second": 25.27,
"eval_steps_per_second": 0.4,
"step": 190
},
{
"epoch": 1.17,
"grad_norm": 2.014072063970188,
"learning_rate": 0.0004967836257309941,
"loss": 0.6051,
"step": 222
},
{
"epoch": 1.36,
"grad_norm": 2.5322811119510225,
"learning_rate": 0.0004868421052631579,
"loss": 0.6124,
"step": 259
},
{
"epoch": 1.56,
"grad_norm": 1.684844161543083,
"learning_rate": 0.00047690058479532164,
"loss": 0.5759,
"step": 296
},
{
"epoch": 1.75,
"grad_norm": 1.830769701886568,
"learning_rate": 0.00046637426900584796,
"loss": 0.6172,
"step": 333
},
{
"epoch": 1.95,
"grad_norm": 1.482259801017781,
"learning_rate": 0.00045584795321637427,
"loss": 0.5658,
"step": 370
},
{
"epoch": 2.0,
"eval_gen_len": 9.709,
"eval_loss": 0.52587890625,
"eval_rouge1": 86.3194,
"eval_rouge2": 74.6858,
"eval_rougeL": 85.4633,
"eval_rougeLsum": 85.4901,
"eval_runtime": 58.3784,
"eval_samples_per_second": 25.951,
"eval_steps_per_second": 0.411,
"step": 380
},
{
"epoch": 2.14,
"grad_norm": 1.4815030414040293,
"learning_rate": 0.0004450292397660819,
"loss": 0.4243,
"step": 407
},
{
"epoch": 2.34,
"grad_norm": 1.4036378645480725,
"learning_rate": 0.0004342105263157895,
"loss": 0.3784,
"step": 444
},
{
"epoch": 2.53,
"grad_norm": 1.5836309025847368,
"learning_rate": 0.0004233918128654971,
"loss": 0.3789,
"step": 481
},
{
"epoch": 2.73,
"grad_norm": 1.6685671897157268,
"learning_rate": 0.0004125730994152047,
"loss": 0.4048,
"step": 518
},
{
"epoch": 2.92,
"grad_norm": 1.9497904549926495,
"learning_rate": 0.0004017543859649123,
"loss": 0.3807,
"step": 555
},
{
"epoch": 3.0,
"eval_gen_len": 9.8561,
"eval_loss": 0.51416015625,
"eval_rouge1": 87.4319,
"eval_rouge2": 76.4229,
"eval_rougeL": 86.4987,
"eval_rougeLsum": 86.5222,
"eval_runtime": 59.8804,
"eval_samples_per_second": 25.3,
"eval_steps_per_second": 0.401,
"step": 570
}
],
"logging_steps": 37,
"max_steps": 1900,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 438831341568.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}