{ "best_metric": 0.06633109599351883, "best_model_checkpoint": "flan-t5-base-v3-edos_labelled_aggregated/checkpoint-3500", "epoch": 5.0, "global_step": 8750, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.29, "learning_rate": 4.714285714285714e-05, "loss": 0.1362, "step": 500 }, { "epoch": 0.57, "learning_rate": 4.428571428571428e-05, "loss": 0.09, "step": 1000 }, { "epoch": 0.86, "learning_rate": 4.1428571428571437e-05, "loss": 0.0822, "step": 1500 }, { "epoch": 1.0, "eval_gen_len": 4.805, "eval_loss": 0.06878727674484253, "eval_rouge1": 95.8375, "eval_rouge2": 71.9, "eval_rougeL": 95.8417, "eval_rougeLsum": 95.8333, "eval_runtime": 126.0351, "eval_samples_per_second": 31.737, "eval_steps_per_second": 3.967, "step": 1750 }, { "epoch": 1.14, "learning_rate": 3.857142857142858e-05, "loss": 0.0751, "step": 2000 }, { "epoch": 1.43, "learning_rate": 3.571428571428572e-05, "loss": 0.068, "step": 2500 }, { "epoch": 1.71, "learning_rate": 3.285714285714286e-05, "loss": 0.073, "step": 3000 }, { "epoch": 2.0, "learning_rate": 3e-05, "loss": 0.0663, "step": 3500 }, { "epoch": 2.0, "eval_gen_len": 4.79025, "eval_loss": 0.06633109599351883, "eval_rouge1": 95.8792, "eval_rouge2": 71.25, "eval_rougeL": 95.875, "eval_rougeLsum": 95.8792, "eval_runtime": 124.8268, "eval_samples_per_second": 32.044, "eval_steps_per_second": 4.006, "step": 3500 }, { "epoch": 2.29, "learning_rate": 2.714285714285714e-05, "loss": 0.0571, "step": 4000 }, { "epoch": 2.57, "learning_rate": 2.4285714285714288e-05, "loss": 0.058, "step": 4500 }, { "epoch": 2.86, "learning_rate": 2.1428571428571428e-05, "loss": 0.0536, "step": 5000 }, { "epoch": 3.0, "eval_gen_len": 4.758, "eval_loss": 0.07638958841562271, "eval_rouge1": 95.65, "eval_rouge2": 69.3, "eval_rougeL": 95.65, "eval_rougeLsum": 95.6667, "eval_runtime": 124.7009, "eval_samples_per_second": 32.077, "eval_steps_per_second": 4.01, "step": 5250 }, { "epoch": 3.14, "learning_rate": 1.8571428571428572e-05, "loss": 0.0552, "step": 5500 }, { "epoch": 3.43, "learning_rate": 1.5714285714285715e-05, "loss": 0.049, "step": 6000 }, { "epoch": 3.71, "learning_rate": 1.2857142857142857e-05, "loss": 0.0468, "step": 6500 }, { "epoch": 4.0, "learning_rate": 1e-05, "loss": 0.0486, "step": 7000 }, { "epoch": 4.0, "eval_gen_len": 4.76575, "eval_loss": 0.08493922650814056, "eval_rouge1": 95.8333, "eval_rouge2": 69.925, "eval_rougeL": 95.8167, "eval_rougeLsum": 95.8333, "eval_runtime": 124.2728, "eval_samples_per_second": 32.187, "eval_steps_per_second": 4.023, "step": 7000 }, { "epoch": 4.29, "learning_rate": 7.142857142857143e-06, "loss": 0.0429, "step": 7500 }, { "epoch": 4.57, "learning_rate": 4.285714285714286e-06, "loss": 0.043, "step": 8000 }, { "epoch": 4.86, "learning_rate": 1.4285714285714286e-06, "loss": 0.0467, "step": 8500 }, { "epoch": 5.0, "eval_gen_len": 4.7635, "eval_loss": 0.09458264708518982, "eval_rouge1": 95.825, "eval_rouge2": 69.8, "eval_rougeL": 95.8083, "eval_rougeLsum": 95.825, "eval_runtime": 124.9433, "eval_samples_per_second": 32.015, "eval_steps_per_second": 4.002, "step": 8750 } ], "max_steps": 8750, "num_train_epochs": 5, "total_flos": 1.198325366784e+16, "trial_name": null, "trial_params": null }