{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.41841004184100417, "eval_steps": 300, "global_step": 1200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.017433751743375175, "grad_norm": 11.72121810913086, "learning_rate": 9.966527196652718e-07, "loss": 10.429, "step": 50 }, { "epoch": 0.03486750348675035, "grad_norm": 7.926635265350342, "learning_rate": 9.933054393305438e-07, "loss": 9.5621, "step": 100 }, { "epoch": 0.05230125523012552, "grad_norm": 14.6965970993042, "learning_rate": 9.898884239888422e-07, "loss": 8.9643, "step": 150 }, { "epoch": 0.0697350069735007, "grad_norm": 19.375049591064453, "learning_rate": 9.864714086471408e-07, "loss": 8.4059, "step": 200 }, { "epoch": 0.08716875871687588, "grad_norm": 18.03301429748535, "learning_rate": 9.833333333333332e-07, "loss": 7.612, "step": 250 }, { "epoch": 0.10460251046025104, "grad_norm": 10.96057415008545, "learning_rate": 9.798465829846583e-07, "loss": 6.8137, "step": 300 }, { "epoch": 0.10460251046025104, "eval_gen_len": 10.4739, "eval_loss": 6.225385665893555, "eval_rouge1": 6.5635, "eval_rouge2": 1.1205, "eval_rougeL": 5.1919, "eval_rougeLsum": 5.8048, "eval_runtime": 493.488, "eval_samples_per_second": 11.036, "eval_steps_per_second": 0.087, "step": 300 }, { "epoch": 0.12203626220362622, "grad_norm": 13.111268997192383, "learning_rate": 9.763598326359831e-07, "loss": 6.0284, "step": 350 }, { "epoch": 0.1394700139470014, "grad_norm": 9.499017715454102, "learning_rate": 9.730822873082288e-07, "loss": 5.3582, "step": 400 }, { "epoch": 0.15690376569037656, "grad_norm": 5.717628002166748, "learning_rate": 9.696652719665272e-07, "loss": 4.8628, "step": 450 }, { "epoch": 0.17433751743375175, "grad_norm": 3.644038438796997, "learning_rate": 9.662482566248256e-07, "loss": 4.6036, "step": 500 }, { "epoch": 0.19177126917712692, "grad_norm": 4.016868591308594, "learning_rate": 9.62831241283124e-07, "loss": 4.4095, "step": 550 }, { "epoch": 0.20920502092050208, "grad_norm": 1.7940410375595093, "learning_rate": 9.59344490934449e-07, "loss": 4.3409, "step": 600 }, { "epoch": 0.20920502092050208, "eval_gen_len": 21.2969, "eval_loss": 4.094496250152588, "eval_rouge1": 7.0701, "eval_rouge2": 1.1979, "eval_rougeL": 5.5075, "eval_rougeLsum": 6.2106, "eval_runtime": 464.764, "eval_samples_per_second": 11.718, "eval_steps_per_second": 0.093, "step": 600 }, { "epoch": 0.22663877266387727, "grad_norm": 1.70875084400177, "learning_rate": 9.558577405857741e-07, "loss": 4.3321, "step": 650 }, { "epoch": 0.24407252440725244, "grad_norm": 1.239980936050415, "learning_rate": 9.52510460251046e-07, "loss": 4.2697, "step": 700 }, { "epoch": 0.2615062761506276, "grad_norm": 1.91974675655365, "learning_rate": 9.49163179916318e-07, "loss": 4.2407, "step": 750 }, { "epoch": 0.2789400278940028, "grad_norm": 2.3624725341796875, "learning_rate": 9.4581589958159e-07, "loss": 4.2552, "step": 800 }, { "epoch": 0.296373779637378, "grad_norm": 3.15625, "learning_rate": 9.424686192468618e-07, "loss": 4.2662, "step": 850 }, { "epoch": 0.3138075313807531, "grad_norm": 3.3393449783325195, "learning_rate": 9.390516039051604e-07, "loss": 4.2447, "step": 900 }, { "epoch": 0.3138075313807531, "eval_gen_len": 40.1348, "eval_loss": 4.038403034210205, "eval_rouge1": 8.106, "eval_rouge2": 1.3148, "eval_rougeL": 6.3905, "eval_rougeLsum": 7.0688, "eval_runtime": 806.4671, "eval_samples_per_second": 6.753, "eval_steps_per_second": 0.053, "step": 900 }, { "epoch": 0.3312412831241283, "grad_norm": 4.088630199432373, "learning_rate": 9.357043235704322e-07, "loss": 4.2266, "step": 950 }, { "epoch": 0.3486750348675035, "grad_norm": 0.0, "learning_rate": 9.324965132496513e-07, "loss": 4.2561, "step": 1000 }, { "epoch": 0.36610878661087864, "grad_norm": 0.0, "learning_rate": 9.290794979079497e-07, "loss": 4.2302, "step": 1050 }, { "epoch": 0.38354253835425384, "grad_norm": 0.0, "learning_rate": 9.256624825662482e-07, "loss": 4.2562, "step": 1100 }, { "epoch": 0.40097629009762903, "grad_norm": 0.0, "learning_rate": 9.223152022315202e-07, "loss": 4.2719, "step": 1150 }, { "epoch": 0.41841004184100417, "grad_norm": 0.0, "learning_rate": 9.188981868898187e-07, "loss": 4.2771, "step": 1200 }, { "epoch": 0.41841004184100417, "eval_gen_len": 40.3153, "eval_loss": 4.038360118865967, "eval_rouge1": 8.0991, "eval_rouge2": 1.3122, "eval_rougeL": 6.386, "eval_rougeLsum": 7.0615, "eval_runtime": 808.1023, "eval_samples_per_second": 6.739, "eval_steps_per_second": 0.053, "step": 1200 } ], "logging_steps": 50, "max_steps": 14340, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 300, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.3383982997504e+16, "train_batch_size": 32, "trial_name": null, "trial_params": null }