|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 15.988480921526278, |
|
"eval_steps": 500, |
|
"global_step": 16656, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.9399615754082615e-05, |
|
"loss": 1.8112, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.879923150816523e-05, |
|
"loss": 1.6328, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_f1": 0.9029, |
|
"eval_gen_len": 19.87818181818182, |
|
"eval_loss": 1.4800708293914795, |
|
"eval_precision": 0.9134, |
|
"eval_recall": 0.893, |
|
"eval_rouge1": 0.448, |
|
"eval_rouge2": 0.2243, |
|
"eval_rougeL": 0.385, |
|
"eval_rougeLsum": 0.385, |
|
"eval_runtime": 603.3554, |
|
"eval_samples_per_second": 9.116, |
|
"eval_steps_per_second": 0.57, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.819884726224784e-05, |
|
"loss": 1.4991, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.7598463016330453e-05, |
|
"loss": 1.4598, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_f1": 0.9022, |
|
"eval_gen_len": 19.934363636363635, |
|
"eval_loss": 1.405110478401184, |
|
"eval_precision": 0.9147, |
|
"eval_recall": 0.8903, |
|
"eval_rouge1": 0.4428, |
|
"eval_rouge2": 0.2273, |
|
"eval_rougeL": 0.3851, |
|
"eval_rougeLsum": 0.385, |
|
"eval_runtime": 669.8531, |
|
"eval_samples_per_second": 8.211, |
|
"eval_steps_per_second": 0.514, |
|
"step": 2083 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.6998078770413066e-05, |
|
"loss": 1.3652, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.6397694524495677e-05, |
|
"loss": 1.3402, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_f1": 0.9034, |
|
"eval_gen_len": 19.95, |
|
"eval_loss": 1.3839877843856812, |
|
"eval_precision": 0.9158, |
|
"eval_recall": 0.8918, |
|
"eval_rouge1": 0.4498, |
|
"eval_rouge2": 0.2318, |
|
"eval_rougeL": 0.3921, |
|
"eval_rougeLsum": 0.392, |
|
"eval_runtime": 670.3562, |
|
"eval_samples_per_second": 8.205, |
|
"eval_steps_per_second": 0.513, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 1.579731027857829e-05, |
|
"loss": 1.2679, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 1.5196926032660904e-05, |
|
"loss": 1.2446, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_f1": 0.9054, |
|
"eval_gen_len": 19.884, |
|
"eval_loss": 1.3682185411453247, |
|
"eval_precision": 0.9169, |
|
"eval_recall": 0.8944, |
|
"eval_rouge1": 0.4604, |
|
"eval_rouge2": 0.2405, |
|
"eval_rougeL": 0.4014, |
|
"eval_rougeLsum": 0.4014, |
|
"eval_runtime": 577.6339, |
|
"eval_samples_per_second": 9.522, |
|
"eval_steps_per_second": 0.596, |
|
"step": 4167 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 1.4596541786743516e-05, |
|
"loss": 1.1877, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 1.399615754082613e-05, |
|
"loss": 1.1651, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_f1": 0.9055, |
|
"eval_gen_len": 19.894, |
|
"eval_loss": 1.3695330619812012, |
|
"eval_precision": 0.9173, |
|
"eval_recall": 0.8942, |
|
"eval_rouge1": 0.4594, |
|
"eval_rouge2": 0.2401, |
|
"eval_rougeL": 0.3995, |
|
"eval_rougeLsum": 0.3995, |
|
"eval_runtime": 669.362, |
|
"eval_samples_per_second": 8.217, |
|
"eval_steps_per_second": 0.514, |
|
"step": 5208 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 1.3395773294908743e-05, |
|
"loss": 1.1201, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 1.2795389048991355e-05, |
|
"loss": 1.1002, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_f1": 0.9053, |
|
"eval_gen_len": 19.91181818181818, |
|
"eval_loss": 1.3782570362091064, |
|
"eval_precision": 0.9166, |
|
"eval_recall": 0.8945, |
|
"eval_rouge1": 0.4607, |
|
"eval_rouge2": 0.2423, |
|
"eval_rougeL": 0.4014, |
|
"eval_rougeLsum": 0.4014, |
|
"eval_runtime": 671.1543, |
|
"eval_samples_per_second": 8.195, |
|
"eval_steps_per_second": 0.513, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 1.2195004803073969e-05, |
|
"loss": 1.0653, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 1.1594620557156582e-05, |
|
"loss": 1.0427, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_f1": 0.9056, |
|
"eval_gen_len": 19.907454545454545, |
|
"eval_loss": 1.3850913047790527, |
|
"eval_precision": 0.9172, |
|
"eval_recall": 0.8946, |
|
"eval_rouge1": 0.462, |
|
"eval_rouge2": 0.2432, |
|
"eval_rougeL": 0.4028, |
|
"eval_rougeLsum": 0.4028, |
|
"eval_runtime": 669.8936, |
|
"eval_samples_per_second": 8.21, |
|
"eval_steps_per_second": 0.514, |
|
"step": 7292 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 1.0994236311239194e-05, |
|
"loss": 1.0163, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 1.0393852065321808e-05, |
|
"loss": 0.9881, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_f1": 0.9059, |
|
"eval_gen_len": 19.907090909090908, |
|
"eval_loss": 1.3910883665084839, |
|
"eval_precision": 0.9177, |
|
"eval_recall": 0.8947, |
|
"eval_rouge1": 0.4635, |
|
"eval_rouge2": 0.2442, |
|
"eval_rougeL": 0.4038, |
|
"eval_rougeLsum": 0.4037, |
|
"eval_runtime": 573.3321, |
|
"eval_samples_per_second": 9.593, |
|
"eval_steps_per_second": 0.6, |
|
"step": 8334 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"learning_rate": 9.79346781940442e-06, |
|
"loss": 0.9742, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 9.193083573487034e-06, |
|
"loss": 0.9435, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_f1": 0.9067, |
|
"eval_gen_len": 19.880545454545455, |
|
"eval_loss": 1.4075220823287964, |
|
"eval_precision": 0.918, |
|
"eval_recall": 0.8959, |
|
"eval_rouge1": 0.468, |
|
"eval_rouge2": 0.2471, |
|
"eval_rougeL": 0.4085, |
|
"eval_rougeLsum": 0.4084, |
|
"eval_runtime": 599.9366, |
|
"eval_samples_per_second": 9.168, |
|
"eval_steps_per_second": 0.573, |
|
"step": 9375 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"learning_rate": 8.592699327569645e-06, |
|
"loss": 0.9362, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 7.992315081652257e-06, |
|
"loss": 0.9035, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_f1": 0.9064, |
|
"eval_gen_len": 19.881090909090908, |
|
"eval_loss": 1.412468671798706, |
|
"eval_precision": 0.9178, |
|
"eval_recall": 0.8957, |
|
"eval_rouge1": 0.4675, |
|
"eval_rouge2": 0.248, |
|
"eval_rougeL": 0.4085, |
|
"eval_rougeLsum": 0.4086, |
|
"eval_runtime": 566.1377, |
|
"eval_samples_per_second": 9.715, |
|
"eval_steps_per_second": 0.608, |
|
"step": 10417 |
|
}, |
|
{ |
|
"epoch": 10.08, |
|
"learning_rate": 7.391930835734871e-06, |
|
"loss": 0.9014, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 10.56, |
|
"learning_rate": 6.791546589817484e-06, |
|
"loss": 0.8702, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_f1": 0.9063, |
|
"eval_gen_len": 19.894727272727273, |
|
"eval_loss": 1.4218909740447998, |
|
"eval_precision": 0.9181, |
|
"eval_recall": 0.895, |
|
"eval_rouge1": 0.4646, |
|
"eval_rouge2": 0.2455, |
|
"eval_rougeL": 0.405, |
|
"eval_rougeLsum": 0.4051, |
|
"eval_runtime": 670.3799, |
|
"eval_samples_per_second": 8.204, |
|
"eval_steps_per_second": 0.513, |
|
"step": 11459 |
|
}, |
|
{ |
|
"epoch": 11.04, |
|
"learning_rate": 6.191162343900097e-06, |
|
"loss": 0.8741, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 11.52, |
|
"learning_rate": 5.590778097982709e-06, |
|
"loss": 0.8395, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 4.990393852065322e-06, |
|
"loss": 0.8458, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_f1": 0.9061, |
|
"eval_gen_len": 19.898545454545456, |
|
"eval_loss": 1.4338867664337158, |
|
"eval_precision": 0.9177, |
|
"eval_recall": 0.8952, |
|
"eval_rouge1": 0.4643, |
|
"eval_rouge2": 0.2447, |
|
"eval_rougeL": 0.4055, |
|
"eval_rougeLsum": 0.4055, |
|
"eval_runtime": 670.6829, |
|
"eval_samples_per_second": 8.201, |
|
"eval_steps_per_second": 0.513, |
|
"step": 12501 |
|
}, |
|
{ |
|
"epoch": 12.48, |
|
"learning_rate": 4.390009606147935e-06, |
|
"loss": 0.8172, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 12.96, |
|
"learning_rate": 3.7896253602305477e-06, |
|
"loss": 0.8207, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_f1": 0.9064, |
|
"eval_gen_len": 19.905272727272727, |
|
"eval_loss": 1.44303560256958, |
|
"eval_precision": 0.9182, |
|
"eval_recall": 0.8952, |
|
"eval_rouge1": 0.4671, |
|
"eval_rouge2": 0.2463, |
|
"eval_rougeL": 0.4068, |
|
"eval_rougeLsum": 0.4069, |
|
"eval_runtime": 650.7057, |
|
"eval_samples_per_second": 8.452, |
|
"eval_steps_per_second": 0.529, |
|
"step": 13542 |
|
}, |
|
{ |
|
"epoch": 13.44, |
|
"learning_rate": 3.189241114313161e-06, |
|
"loss": 0.8006, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 13.92, |
|
"learning_rate": 2.5888568683957737e-06, |
|
"loss": 0.7987, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_f1": 0.9059, |
|
"eval_gen_len": 19.918, |
|
"eval_loss": 1.449475646018982, |
|
"eval_precision": 0.9179, |
|
"eval_recall": 0.8944, |
|
"eval_rouge1": 0.4633, |
|
"eval_rouge2": 0.2455, |
|
"eval_rougeL": 0.4046, |
|
"eval_rougeLsum": 0.4047, |
|
"eval_runtime": 661.0314, |
|
"eval_samples_per_second": 8.32, |
|
"eval_steps_per_second": 0.52, |
|
"step": 14584 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"learning_rate": 1.988472622478386e-06, |
|
"loss": 0.7843, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 14.88, |
|
"learning_rate": 1.3880883765609993e-06, |
|
"loss": 0.787, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_f1": 0.9064, |
|
"eval_gen_len": 19.895636363636363, |
|
"eval_loss": 1.4560260772705078, |
|
"eval_precision": 0.9182, |
|
"eval_recall": 0.8953, |
|
"eval_rouge1": 0.4666, |
|
"eval_rouge2": 0.2471, |
|
"eval_rougeL": 0.407, |
|
"eval_rougeLsum": 0.4072, |
|
"eval_runtime": 670.9962, |
|
"eval_samples_per_second": 8.197, |
|
"eval_steps_per_second": 0.513, |
|
"step": 15626 |
|
}, |
|
{ |
|
"epoch": 15.36, |
|
"learning_rate": 7.87704130643612e-07, |
|
"loss": 0.7775, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 15.84, |
|
"learning_rate": 1.8731988472622478e-07, |
|
"loss": 0.772, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 15.99, |
|
"eval_f1": 0.9068, |
|
"eval_gen_len": 19.881636363636364, |
|
"eval_loss": 1.4622657299041748, |
|
"eval_precision": 0.9185, |
|
"eval_recall": 0.8957, |
|
"eval_rouge1": 0.4678, |
|
"eval_rouge2": 0.2472, |
|
"eval_rougeL": 0.4081, |
|
"eval_rougeLsum": 0.4082, |
|
"eval_runtime": 669.6134, |
|
"eval_samples_per_second": 8.214, |
|
"eval_steps_per_second": 0.514, |
|
"step": 16656 |
|
}, |
|
{ |
|
"epoch": 15.99, |
|
"step": 16656, |
|
"total_flos": 3.421567656204632e+18, |
|
"train_loss": 1.050457198154915, |
|
"train_runtime": 71670.9422, |
|
"train_samples_per_second": 22.324, |
|
"train_steps_per_second": 0.232 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 16656, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 16, |
|
"save_steps": 500, |
|
"total_flos": 3.421567656204632e+18, |
|
"train_batch_size": 24, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|