{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9985869053226566, "eval_steps": 14, "global_step": 265, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0527555346208196, "grad_norm": 0.18791379034519196, "learning_rate": 0.0013333333333333333, "loss": 1.1165, "step": 14 }, { "epoch": 0.1055110692416392, "grad_norm": 1.2923468351364136, "learning_rate": 0.001044465935734187, "loss": 1.5197, "step": 28 }, { "epoch": 0.15826660386245878, "grad_norm": 0.27302563190460205, "learning_rate": 0.0006928203230275508, "loss": 0.9163, "step": 42 }, { "epoch": 0.2110221384832784, "grad_norm": 0.252996027469635, "learning_rate": 0.0005547001962252292, "loss": 0.8046, "step": 56 }, { "epoch": 0.263777673104098, "grad_norm": 0.25708699226379395, "learning_rate": 0.00047583095143088644, "loss": 0.7624, "step": 70 }, { "epoch": 0.31653320772491755, "grad_norm": 0.2488645762205124, "learning_rate": 0.000423207369515159, "loss": 0.7358, "step": 84 }, { "epoch": 0.36928874234573716, "grad_norm": 0.2357274889945984, "learning_rate": 0.00038490017945975053, "loss": 0.7009, "step": 98 }, { "epoch": 0.4220442769665568, "grad_norm": 0.25910037755966187, "learning_rate": 0.00035540932665545545, "loss": 0.6576, "step": 112 }, { "epoch": 0.47479981158737633, "grad_norm": 0.24194850027561188, "learning_rate": 0.00033180075816559865, "loss": 0.6456, "step": 126 }, { "epoch": 0.527555346208196, "grad_norm": 0.31878945231437683, "learning_rate": 0.0003123475237772121, "loss": 0.6035, "step": 140 }, { "epoch": 0.5803108808290155, "grad_norm": 0.23705270886421204, "learning_rate": 0.00029595817420019407, "loss": 0.6138, "step": 154 }, { "epoch": 0.6330664154498351, "grad_norm": 0.32103270292282104, "learning_rate": 0.0002819045914409638, "loss": 0.5941, "step": 168 }, { "epoch": 0.6858219500706547, "grad_norm": 0.2693799138069153, "learning_rate": 0.00026967994498529687, "loss": 0.5686, "step": 182 }, { "epoch": 0.7385774846914743, "grad_norm": 0.2850986123085022, "learning_rate": 0.0002589191112012619, "loss": 0.5854, "step": 196 }, { "epoch": 0.7913330193122939, "grad_norm": 0.2964475154876709, "learning_rate": 0.00024935149047701483, "loss": 0.5482, "step": 210 }, { "epoch": 0.8440885539331136, "grad_norm": 0.27225586771965027, "learning_rate": 0.0002407717061715384, "loss": 0.5447, "step": 224 }, { "epoch": 0.8968440885539332, "grad_norm": 0.24302007257938385, "learning_rate": 0.00023302069121418522, "loss": 0.5245, "step": 238 }, { "epoch": 0.9495996231747527, "grad_norm": 0.3157835900783539, "learning_rate": 0.00022597307314641284, "loss": 0.5328, "step": 252 } ], "logging_steps": 14, "max_steps": 265, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 14, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.691462902673572e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }