{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9993211133740665, "eval_steps": 500, "global_step": 184, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05431093007467753, "grad_norm": 1.5025734901428223, "learning_rate": 0.00019904804439875633, "loss": 3.2617, "step": 10 }, { "epoch": 0.10862186014935506, "grad_norm": 1.5902663469314575, "learning_rate": 0.00019521176659107142, "loss": 1.7882, "step": 20 }, { "epoch": 0.1629327902240326, "grad_norm": 0.7277324795722961, "learning_rate": 0.000188545602565321, "loss": 1.1819, "step": 30 }, { "epoch": 0.2172437202987101, "grad_norm": 0.6814110279083252, "learning_rate": 0.00017924768419510904, "loss": 1.0805, "step": 40 }, { "epoch": 0.27155465037338766, "grad_norm": 0.6621173620223999, "learning_rate": 0.00016759436441447545, "loss": 1.0806, "step": 50 }, { "epoch": 0.3258655804480652, "grad_norm": 0.717393696308136, "learning_rate": 0.00015393200344991995, "loss": 1.0157, "step": 60 }, { "epoch": 0.3801765105227427, "grad_norm": 0.6353682279586792, "learning_rate": 0.0001386666742941419, "loss": 1.0388, "step": 70 }, { "epoch": 0.4344874405974202, "grad_norm": 0.7220941185951233, "learning_rate": 0.00012225209339563145, "loss": 0.9547, "step": 80 }, { "epoch": 0.48879837067209775, "grad_norm": 0.7532466650009155, "learning_rate": 0.00010517613528842097, "loss": 1.0116, "step": 90 }, { "epoch": 0.5431093007467753, "grad_norm": 0.8198474645614624, "learning_rate": 8.79463319744677e-05, "loss": 0.9763, "step": 100 }, { "epoch": 0.5974202308214528, "grad_norm": 0.79359370470047, "learning_rate": 7.107478804634325e-05, "loss": 1.0034, "step": 110 }, { "epoch": 0.6517311608961304, "grad_norm": 1.1620718240737915, "learning_rate": 5.506295990328385e-05, "loss": 0.9125, "step": 120 }, { "epoch": 0.7060420909708078, "grad_norm": 1.5092897415161133, "learning_rate": 4.038675145307747e-05, "loss": 0.8748, "step": 130 }, { "epoch": 0.7603530210454854, "grad_norm": 1.0216153860092163, "learning_rate": 2.7482369285662378e-05, "loss": 0.9131, "step": 140 }, { "epoch": 0.814663951120163, "grad_norm": 1.5323857069015503, "learning_rate": 1.6733357731279377e-05, "loss": 0.8702, "step": 150 }, { "epoch": 0.8689748811948405, "grad_norm": 0.962648332118988, "learning_rate": 8.45919914746337e-06, "loss": 0.8396, "step": 160 }, { "epoch": 0.923285811269518, "grad_norm": 0.7791914939880371, "learning_rate": 2.905818257394799e-06, "loss": 0.8633, "step": 170 }, { "epoch": 0.9775967413441955, "grad_norm": 0.8390964865684509, "learning_rate": 2.382727698752474e-07, "loss": 0.8817, "step": 180 } ], "logging_steps": 10, "max_steps": 184, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1445464568266752.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }