{ "best_metric": 1.821385145187378, "best_model_checkpoint": "autotrain-egbhl-hdsiz/checkpoint-80", "epoch": 2.0, "eval_steps": 500, "global_step": 80, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "grad_norm": Infinity, "learning_rate": 0.0, "loss": 4.5433, "step": 2 }, { "epoch": 0.1, "grad_norm": 26.819583892822266, "learning_rate": 4.166666666666667e-06, "loss": 2.9431, "step": 4 }, { "epoch": 0.15, "grad_norm": 24.515037536621094, "learning_rate": 1.25e-05, "loss": 3.5482, "step": 6 }, { "epoch": 0.2, "grad_norm": 12.873324394226074, "learning_rate": 2.0833333333333336e-05, "loss": 2.4086, "step": 8 }, { "epoch": 0.25, "grad_norm": 15.799155235290527, "learning_rate": 2.916666666666667e-05, "loss": 2.8477, "step": 10 }, { "epoch": 0.3, "grad_norm": 23.742237091064453, "learning_rate": 3.7500000000000003e-05, "loss": 2.297, "step": 12 }, { "epoch": 0.35, "grad_norm": 14.426471710205078, "learning_rate": 4.5833333333333334e-05, "loss": 2.1671, "step": 14 }, { "epoch": 0.4, "grad_norm": 14.933609008789062, "learning_rate": 4.9537037037037035e-05, "loss": 1.8833, "step": 16 }, { "epoch": 0.45, "grad_norm": 19.533145904541016, "learning_rate": 4.8611111111111115e-05, "loss": 2.1878, "step": 18 }, { "epoch": 0.5, "grad_norm": 35.50374984741211, "learning_rate": 4.768518518518519e-05, "loss": 1.8188, "step": 20 }, { "epoch": 0.55, "grad_norm": 17.651580810546875, "learning_rate": 4.675925925925926e-05, "loss": 2.0148, "step": 22 }, { "epoch": 0.6, "grad_norm": 14.465134620666504, "learning_rate": 4.62962962962963e-05, "loss": 2.0613, "step": 24 }, { "epoch": 0.65, "grad_norm": 13.421053886413574, "learning_rate": 4.5370370370370374e-05, "loss": 2.5974, "step": 26 }, { "epoch": 0.7, "grad_norm": 17.738975524902344, "learning_rate": 4.4444444444444447e-05, "loss": 2.1862, "step": 28 }, { "epoch": 0.75, "grad_norm": 10.630866050720215, "learning_rate": 4.351851851851852e-05, "loss": 1.9854, "step": 30 }, { "epoch": 0.8, "grad_norm": 13.5612211227417, "learning_rate": 4.259259259259259e-05, "loss": 2.3207, "step": 32 }, { "epoch": 0.85, "grad_norm": 12.230231285095215, "learning_rate": 4.166666666666667e-05, "loss": 2.1946, "step": 34 }, { "epoch": 0.9, "grad_norm": 10.226851463317871, "learning_rate": 4.074074074074074e-05, "loss": 2.0726, "step": 36 }, { "epoch": 0.95, "grad_norm": 17.564428329467773, "learning_rate": 3.981481481481482e-05, "loss": 1.8402, "step": 38 }, { "epoch": 1.0, "grad_norm": 16.211181640625, "learning_rate": 3.888888888888889e-05, "loss": 1.7316, "step": 40 }, { "epoch": 1.0, "eval_gen_len": 19.05, "eval_loss": 1.8881380558013916, "eval_rouge1": 55.9005, "eval_rouge2": 39.684, "eval_rougeL": 53.7239, "eval_rougeLsum": 53.5879, "eval_runtime": 2.3238, "eval_samples_per_second": 8.606, "eval_steps_per_second": 2.152, "step": 40 }, { "epoch": 1.05, "grad_norm": 11.123723030090332, "learning_rate": 3.7962962962962964e-05, "loss": 1.8779, "step": 42 }, { "epoch": 1.1, "grad_norm": 17.29766845703125, "learning_rate": 3.7037037037037037e-05, "loss": 1.2122, "step": 44 }, { "epoch": 1.15, "grad_norm": 11.228723526000977, "learning_rate": 3.611111111111111e-05, "loss": 1.1288, "step": 46 }, { "epoch": 1.2, "grad_norm": 16.350183486938477, "learning_rate": 3.518518518518519e-05, "loss": 1.6717, "step": 48 }, { "epoch": 1.25, "grad_norm": 73.40056610107422, "learning_rate": 3.425925925925926e-05, "loss": 1.6587, "step": 50 }, { "epoch": 1.3, "grad_norm": 23.507516860961914, "learning_rate": 3.3333333333333335e-05, "loss": 1.1566, "step": 52 }, { "epoch": 1.35, "grad_norm": 13.975730895996094, "learning_rate": 3.240740740740741e-05, "loss": 1.1459, "step": 54 }, { "epoch": 1.4, "grad_norm": 12.104181289672852, "learning_rate": 3.148148148148148e-05, "loss": 1.1564, "step": 56 }, { "epoch": 1.45, "grad_norm": 9.906718254089355, "learning_rate": 3.055555555555556e-05, "loss": 1.564, "step": 58 }, { "epoch": 1.5, "grad_norm": 11.82476806640625, "learning_rate": 2.962962962962963e-05, "loss": 1.8214, "step": 60 }, { "epoch": 1.55, "grad_norm": 11.349201202392578, "learning_rate": 2.8703703703703706e-05, "loss": 1.8952, "step": 62 }, { "epoch": 1.6, "grad_norm": 7.890576362609863, "learning_rate": 2.777777777777778e-05, "loss": 1.0706, "step": 64 }, { "epoch": 1.65, "grad_norm": 19.47081184387207, "learning_rate": 2.6851851851851855e-05, "loss": 1.5111, "step": 66 }, { "epoch": 1.7, "grad_norm": 10.134727478027344, "learning_rate": 2.5925925925925925e-05, "loss": 1.4847, "step": 68 }, { "epoch": 1.75, "grad_norm": 10.425029754638672, "learning_rate": 2.5e-05, "loss": 1.0191, "step": 70 }, { "epoch": 1.8, "grad_norm": 10.284099578857422, "learning_rate": 2.4074074074074074e-05, "loss": 1.8213, "step": 72 }, { "epoch": 1.85, "grad_norm": 13.63291072845459, "learning_rate": 2.314814814814815e-05, "loss": 1.0306, "step": 74 }, { "epoch": 1.9, "grad_norm": 12.470438003540039, "learning_rate": 2.2222222222222223e-05, "loss": 1.7619, "step": 76 }, { "epoch": 1.95, "grad_norm": 17.98394012451172, "learning_rate": 2.1296296296296296e-05, "loss": 1.2194, "step": 78 }, { "epoch": 2.0, "grad_norm": 9.503134727478027, "learning_rate": 2.037037037037037e-05, "loss": 1.1788, "step": 80 }, { "epoch": 2.0, "eval_gen_len": 19.15, "eval_loss": 1.821385145187378, "eval_rouge1": 55.4476, "eval_rouge2": 40.4173, "eval_rougeL": 53.9637, "eval_rougeLsum": 53.9177, "eval_runtime": 2.3273, "eval_samples_per_second": 8.594, "eval_steps_per_second": 2.148, "step": 80 } ], "logging_steps": 2, "max_steps": 120, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 42923062050816.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }