{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.000421474105684632, "eval_steps": 3, "global_step": 10, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 4.21474105684632e-05, "grad_norm": 0.541431188583374, "learning_rate": 2e-05, "loss": 1.7169, "step": 1 }, { "epoch": 4.21474105684632e-05, "eval_loss": 1.6580084562301636, "eval_runtime": 553.6364, "eval_samples_per_second": 18.044, "eval_steps_per_second": 9.022, "step": 1 }, { "epoch": 8.42948211369264e-05, "grad_norm": 0.48165619373321533, "learning_rate": 4e-05, "loss": 1.4538, "step": 2 }, { "epoch": 0.0001264422317053896, "grad_norm": 0.5114144086837769, "learning_rate": 6e-05, "loss": 1.7415, "step": 3 }, { "epoch": 0.0001264422317053896, "eval_loss": 1.6541515588760376, "eval_runtime": 553.1847, "eval_samples_per_second": 18.059, "eval_steps_per_second": 9.03, "step": 3 }, { "epoch": 0.0001685896422738528, "grad_norm": 0.5347130298614502, "learning_rate": 8e-05, "loss": 1.7013, "step": 4 }, { "epoch": 0.000210737052842316, "grad_norm": 0.7098687291145325, "learning_rate": 0.0001, "loss": 1.7653, "step": 5 }, { "epoch": 0.0002528844634107792, "grad_norm": 0.6764225959777832, "learning_rate": 0.00012, "loss": 1.6061, "step": 6 }, { "epoch": 0.0002528844634107792, "eval_loss": 1.6010996103286743, "eval_runtime": 553.2531, "eval_samples_per_second": 18.057, "eval_steps_per_second": 9.028, "step": 6 }, { "epoch": 0.0002950318739792424, "grad_norm": 0.7717322707176208, "learning_rate": 0.00014, "loss": 1.6238, "step": 7 }, { "epoch": 0.0003371792845477056, "grad_norm": 0.7759086489677429, "learning_rate": 0.00016, "loss": 1.6759, "step": 8 }, { "epoch": 0.0003793266951161688, "grad_norm": 0.6566513776779175, "learning_rate": 0.00018, "loss": 1.6666, "step": 9 }, { "epoch": 0.0003793266951161688, "eval_loss": 1.5033568143844604, "eval_runtime": 553.3876, "eval_samples_per_second": 18.052, "eval_steps_per_second": 9.026, "step": 9 }, { "epoch": 0.000421474105684632, "grad_norm": 0.6918085813522339, "learning_rate": 0.0002, "loss": 1.5941, "step": 10 } ], "logging_steps": 1, "max_steps": 10, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 3, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3560411177680896.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }