{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.0023181686467690526, "eval_steps": 3, "global_step": 10, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00023181686467690525, "grad_norm": 11.30616283416748, "learning_rate": 2e-05, "loss": 6.5095, "step": 1 }, { "epoch": 0.00023181686467690525, "eval_loss": 1.599483847618103, "eval_runtime": 65.7503, "eval_samples_per_second": 27.635, "eval_steps_per_second": 13.825, "step": 1 }, { "epoch": 0.0004636337293538105, "grad_norm": 7.352132797241211, "learning_rate": 4e-05, "loss": 6.1124, "step": 2 }, { "epoch": 0.0006954505940307157, "grad_norm": 6.156064033508301, "learning_rate": 6e-05, "loss": 5.6913, "step": 3 }, { "epoch": 0.0006954505940307157, "eval_loss": 1.5879477262496948, "eval_runtime": 65.5595, "eval_samples_per_second": 27.715, "eval_steps_per_second": 13.865, "step": 3 }, { "epoch": 0.000927267458707621, "grad_norm": 9.128706932067871, "learning_rate": 8e-05, "loss": 6.6768, "step": 4 }, { "epoch": 0.0011590843233845263, "grad_norm": 9.580340385437012, "learning_rate": 0.0001, "loss": 4.9169, "step": 5 }, { "epoch": 0.0013909011880614314, "grad_norm": 5.978302478790283, "learning_rate": 0.00012, "loss": 6.9236, "step": 6 }, { "epoch": 0.0013909011880614314, "eval_loss": 1.5002448558807373, "eval_runtime": 65.2697, "eval_samples_per_second": 27.838, "eval_steps_per_second": 13.927, "step": 6 }, { "epoch": 0.0016227180527383367, "grad_norm": 6.212968349456787, "learning_rate": 0.00014, "loss": 4.9658, "step": 7 }, { "epoch": 0.001854534917415242, "grad_norm": 11.32003402709961, "learning_rate": 0.00016, "loss": 6.3171, "step": 8 }, { "epoch": 0.0020863517820921473, "grad_norm": 6.1606831550598145, "learning_rate": 0.00018, "loss": 5.7145, "step": 9 }, { "epoch": 0.0020863517820921473, "eval_loss": 1.2604190111160278, "eval_runtime": 65.6497, "eval_samples_per_second": 27.677, "eval_steps_per_second": 13.846, "step": 9 }, { "epoch": 0.0023181686467690526, "grad_norm": 10.335721969604492, "learning_rate": 0.0002, "loss": 4.6105, "step": 10 } ], "logging_steps": 1, "max_steps": 10, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 3, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1628733135912960.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }