{ "best_metric": null, "best_model_checkpoint": null, "epoch": 15.961630695443645, "eval_steps": 500, "global_step": 3328, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_f1": 0.8988, "eval_gen_len": 29.90909090909091, "eval_loss": 1.8075228929519653, "eval_precision": 0.901, "eval_recall": 0.897, "eval_rouge1": 0.411, "eval_rouge2": 0.1689, "eval_rougeL": 0.3152, "eval_rougeLsum": 0.3155, "eval_runtime": 182.3495, "eval_samples_per_second": 6.032, "eval_steps_per_second": 0.378, "step": 208 }, { "epoch": 2.0, "eval_f1": 0.904, "eval_gen_len": 29.907272727272726, "eval_loss": 1.7311877012252808, "eval_precision": 0.9059, "eval_recall": 0.9024, "eval_rouge1": 0.4379, "eval_rouge2": 0.1893, "eval_rougeL": 0.3442, "eval_rougeLsum": 0.3446, "eval_runtime": 160.7204, "eval_samples_per_second": 6.844, "eval_steps_per_second": 0.429, "step": 417 }, { "epoch": 2.4, "learning_rate": 1.699519230769231e-05, "loss": 2.0112, "step": 500 }, { "epoch": 3.0, "eval_f1": 0.9055, "eval_gen_len": 30.017272727272726, "eval_loss": 1.6986640691757202, "eval_precision": 0.9075, "eval_recall": 0.9039, "eval_rouge1": 0.4475, "eval_rouge2": 0.1978, "eval_rougeL": 0.352, "eval_rougeLsum": 0.3525, "eval_runtime": 161.6717, "eval_samples_per_second": 6.804, "eval_steps_per_second": 0.427, "step": 625 }, { "epoch": 4.0, "eval_f1": 0.9063, "eval_gen_len": 30.061818181818182, "eval_loss": 1.676792025566101, "eval_precision": 0.9082, "eval_recall": 0.9047, "eval_rouge1": 0.4514, "eval_rouge2": 0.1981, "eval_rougeL": 0.357, "eval_rougeLsum": 0.3573, "eval_runtime": 160.8885, "eval_samples_per_second": 6.837, "eval_steps_per_second": 0.429, "step": 834 }, { "epoch": 4.8, "learning_rate": 1.3990384615384615e-05, "loss": 1.7647, "step": 1000 }, { "epoch": 5.0, "eval_f1": 0.9068, "eval_gen_len": 30.326363636363638, "eval_loss": 1.661742091178894, "eval_precision": 0.9084, "eval_recall": 0.9055, "eval_rouge1": 0.4537, "eval_rouge2": 0.2003, "eval_rougeL": 0.3592, "eval_rougeLsum": 0.3595, "eval_runtime": 162.5609, "eval_samples_per_second": 6.767, "eval_steps_per_second": 0.424, "step": 1042 }, { "epoch": 6.0, "eval_f1": 0.9072, "eval_gen_len": 30.082727272727272, "eval_loss": 1.6502095460891724, "eval_precision": 0.9089, "eval_recall": 0.9057, "eval_rouge1": 0.4554, "eval_rouge2": 0.2021, "eval_rougeL": 0.3607, "eval_rougeLsum": 0.361, "eval_runtime": 159.1789, "eval_samples_per_second": 6.91, "eval_steps_per_second": 0.433, "step": 1251 }, { "epoch": 7.0, "eval_f1": 0.908, "eval_gen_len": 30.021818181818183, "eval_loss": 1.6416178941726685, "eval_precision": 0.9099, "eval_recall": 0.9064, "eval_rouge1": 0.4592, "eval_rouge2": 0.2052, "eval_rougeL": 0.3639, "eval_rougeLsum": 0.3641, "eval_runtime": 156.6401, "eval_samples_per_second": 7.022, "eval_steps_per_second": 0.441, "step": 1459 }, { "epoch": 7.19, "learning_rate": 1.0985576923076924e-05, "loss": 1.6948, "step": 1500 }, { "epoch": 8.0, "eval_f1": 0.9081, "eval_gen_len": 30.78272727272727, "eval_loss": 1.6360372304916382, "eval_precision": 0.909, "eval_recall": 0.9074, "eval_rouge1": 0.4612, "eval_rouge2": 0.2054, "eval_rougeL": 0.3649, "eval_rougeLsum": 0.365, "eval_runtime": 160.7238, "eval_samples_per_second": 6.844, "eval_steps_per_second": 0.429, "step": 1668 }, { "epoch": 9.0, "eval_f1": 0.9083, "eval_gen_len": 30.62909090909091, "eval_loss": 1.6301532983779907, "eval_precision": 0.9095, "eval_recall": 0.9074, "eval_rouge1": 0.4621, "eval_rouge2": 0.2062, "eval_rougeL": 0.3645, "eval_rougeLsum": 0.3647, "eval_runtime": 159.8662, "eval_samples_per_second": 6.881, "eval_steps_per_second": 0.432, "step": 1876 }, { "epoch": 9.59, "learning_rate": 7.980769230769232e-06, "loss": 1.6501, "step": 2000 }, { "epoch": 10.0, "eval_f1": 0.9083, "eval_gen_len": 30.48181818181818, "eval_loss": 1.6264721155166626, "eval_precision": 0.9095, "eval_recall": 0.9073, "eval_rouge1": 0.4606, "eval_rouge2": 0.2051, "eval_rougeL": 0.3651, "eval_rougeLsum": 0.3655, "eval_runtime": 157.2711, "eval_samples_per_second": 6.994, "eval_steps_per_second": 0.439, "step": 2085 }, { "epoch": 11.0, "eval_f1": 0.9087, "eval_gen_len": 30.806363636363635, "eval_loss": 1.6229554414749146, "eval_precision": 0.9097, "eval_recall": 0.908, "eval_rouge1": 0.4625, "eval_rouge2": 0.2073, "eval_rougeL": 0.3658, "eval_rougeLsum": 0.366, "eval_runtime": 157.1263, "eval_samples_per_second": 7.001, "eval_steps_per_second": 0.439, "step": 2293 }, { "epoch": 11.99, "learning_rate": 4.975961538461539e-06, "loss": 1.6222, "step": 2500 }, { "epoch": 12.0, "eval_f1": 0.909, "eval_gen_len": 30.55272727272727, "eval_loss": 1.6204967498779297, "eval_precision": 0.9103, "eval_recall": 0.9081, "eval_rouge1": 0.4644, "eval_rouge2": 0.2082, "eval_rougeL": 0.3674, "eval_rougeLsum": 0.3679, "eval_runtime": 155.7494, "eval_samples_per_second": 7.063, "eval_steps_per_second": 0.443, "step": 2502 }, { "epoch": 13.0, "eval_f1": 0.909, "eval_gen_len": 30.805454545454545, "eval_loss": 1.618751883506775, "eval_precision": 0.9101, "eval_recall": 0.9083, "eval_rouge1": 0.4648, "eval_rouge2": 0.2087, "eval_rougeL": 0.3681, "eval_rougeLsum": 0.3683, "eval_runtime": 158.59, "eval_samples_per_second": 6.936, "eval_steps_per_second": 0.435, "step": 2710 }, { "epoch": 14.0, "eval_f1": 0.9093, "eval_gen_len": 30.670909090909092, "eval_loss": 1.6172130107879639, "eval_precision": 0.9104, "eval_recall": 0.9084, "eval_rouge1": 0.4654, "eval_rouge2": 0.2097, "eval_rougeL": 0.3685, "eval_rougeLsum": 0.3689, "eval_runtime": 159.3516, "eval_samples_per_second": 6.903, "eval_steps_per_second": 0.433, "step": 2919 }, { "epoch": 14.39, "learning_rate": 1.971153846153846e-06, "loss": 1.6048, "step": 3000 }, { "epoch": 15.0, "eval_f1": 0.9093, "eval_gen_len": 30.630909090909093, "eval_loss": 1.6168792247772217, "eval_precision": 0.9104, "eval_recall": 0.9084, "eval_rouge1": 0.465, "eval_rouge2": 0.21, "eval_rougeL": 0.3693, "eval_rougeLsum": 0.3697, "eval_runtime": 158.4169, "eval_samples_per_second": 6.944, "eval_steps_per_second": 0.436, "step": 3127 }, { "epoch": 15.96, "eval_f1": 0.9091, "eval_gen_len": 30.619090909090907, "eval_loss": 1.6166560649871826, "eval_precision": 0.9102, "eval_recall": 0.9083, "eval_rouge1": 0.4649, "eval_rouge2": 0.2096, "eval_rougeL": 0.3686, "eval_rougeLsum": 0.3688, "eval_runtime": 157.7317, "eval_samples_per_second": 6.974, "eval_steps_per_second": 0.437, "step": 3328 }, { "epoch": 15.96, "step": 3328, "total_flos": 4.6122414295331635e+17, "train_loss": 1.711962864949153, "train_runtime": 11907.5247, "train_samples_per_second": 26.874, "train_steps_per_second": 0.279 } ], "logging_steps": 500, "max_steps": 3328, "num_input_tokens_seen": 0, "num_train_epochs": 16, "save_steps": 500, "total_flos": 4.6122414295331635e+17, "train_batch_size": 24, "trial_name": null, "trial_params": null }