{ "best_metric": 0.5348, "best_model_checkpoint": "outputs/checkpoint-29586", "epoch": 8.0, "global_step": 236688, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 0.0004469415766030179, "loss": 3.6106, "step": 29586 }, { "epoch": 1.0, "eval_gen_len": 15.296, "eval_loss": 2.7799501419067383, "eval_runtime": 368.7401, "eval_sacrebleu": 0.5348, "eval_samples_per_second": 6.175, "eval_steps_per_second": 3.089, "step": 29586 }, { "epoch": 2.0, "learning_rate": 0.00038309277994544387, "loss": 2.7653, "step": 59172 }, { "epoch": 2.0, "eval_gen_len": 14.7356, "eval_loss": 2.742591619491577, "eval_runtime": 368.9941, "eval_sacrebleu": 0.4271, "eval_samples_per_second": 6.171, "eval_steps_per_second": 3.087, "step": 59172 }, { "epoch": 3.0, "learning_rate": 0.0003192439832878699, "loss": 2.7302, "step": 88758 }, { "epoch": 3.0, "eval_gen_len": 14.7444, "eval_loss": 2.7364578247070312, "eval_runtime": 357.3278, "eval_sacrebleu": 0.4211, "eval_samples_per_second": 6.372, "eval_steps_per_second": 3.188, "step": 88758 }, { "epoch": 4.0, "learning_rate": 0.0002553951866302959, "loss": 2.705, "step": 118344 }, { "epoch": 4.0, "eval_gen_len": 14.8669, "eval_loss": 2.7070209980010986, "eval_runtime": 365.3611, "eval_sacrebleu": 0.4635, "eval_samples_per_second": 6.232, "eval_steps_per_second": 3.117, "step": 118344 }, { "epoch": 5.0, "learning_rate": 0.00019154638997272193, "loss": 2.6613, "step": 147930 }, { "epoch": 5.0, "eval_gen_len": 13.9183, "eval_loss": 2.6642842292785645, "eval_runtime": 363.1965, "eval_sacrebleu": 0.3438, "eval_samples_per_second": 6.269, "eval_steps_per_second": 3.136, "step": 147930 }, { "epoch": 6.0, "learning_rate": 0.00012769759331514796, "loss": 2.6045, "step": 177516 }, { "epoch": 6.0, "eval_gen_len": 14.9087, "eval_loss": 2.614426374435425, "eval_runtime": 359.5255, "eval_sacrebleu": 0.4651, "eval_samples_per_second": 6.333, "eval_steps_per_second": 3.168, "step": 177516 }, { "epoch": 7.0, "learning_rate": 6.384879665757398e-05, "loss": 2.5271, "step": 207102 }, { "epoch": 7.0, "eval_gen_len": 14.6271, "eval_loss": 2.5494837760925293, "eval_runtime": 358.1093, "eval_sacrebleu": 0.468, "eval_samples_per_second": 6.358, "eval_steps_per_second": 3.181, "step": 207102 }, { "epoch": 8.0, "learning_rate": 0.0, "loss": 2.4325, "step": 236688 }, { "epoch": 8.0, "eval_gen_len": 14.7071, "eval_loss": 2.4964401721954346, "eval_runtime": 362.2324, "eval_sacrebleu": 0.484, "eval_samples_per_second": 6.286, "eval_steps_per_second": 3.144, "step": 236688 }, { "epoch": 8.0, "step": 236688, "total_flos": 3.211579566884874e+18, "train_loss": 2.7545470263067835, "train_runtime": 376582.9978, "train_samples_per_second": 20.112, "train_steps_per_second": 0.629 } ], "max_steps": 236688, "num_train_epochs": 8, "total_flos": 3.211579566884874e+18, "trial_name": null, "trial_params": null }