{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.05644402634054563, "eval_steps": 5, "global_step": 30, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0018814675446848542, "eval_loss": 11.94036865234375, "eval_runtime": 3.3357, "eval_samples_per_second": 67.153, "eval_steps_per_second": 33.576, "step": 1 }, { "epoch": 0.005644402634054563, "grad_norm": 0.2863217890262604, "learning_rate": 3e-05, "loss": 11.9358, "step": 3 }, { "epoch": 0.00940733772342427, "eval_loss": 11.93970775604248, "eval_runtime": 3.277, "eval_samples_per_second": 68.356, "eval_steps_per_second": 34.178, "step": 5 }, { "epoch": 0.011288805268109126, "grad_norm": 0.2686111032962799, "learning_rate": 6e-05, "loss": 11.9345, "step": 6 }, { "epoch": 0.016933207902163686, "grad_norm": 0.20651701092720032, "learning_rate": 9e-05, "loss": 11.9359, "step": 9 }, { "epoch": 0.01881467544684854, "eval_loss": 11.937393188476562, "eval_runtime": 3.3512, "eval_samples_per_second": 66.842, "eval_steps_per_second": 33.421, "step": 10 }, { "epoch": 0.022577610536218252, "grad_norm": 0.25992536544799805, "learning_rate": 9.755282581475769e-05, "loss": 11.9336, "step": 12 }, { "epoch": 0.028222013170272814, "grad_norm": 0.2817200720310211, "learning_rate": 8.535533905932738e-05, "loss": 11.9332, "step": 15 }, { "epoch": 0.028222013170272814, "eval_loss": 11.933990478515625, "eval_runtime": 3.3961, "eval_samples_per_second": 65.958, "eval_steps_per_second": 32.979, "step": 15 }, { "epoch": 0.03386641580432737, "grad_norm": 0.2507167160511017, "learning_rate": 6.545084971874738e-05, "loss": 11.9318, "step": 18 }, { "epoch": 0.03762935089369708, "eval_loss": 11.93133544921875, "eval_runtime": 3.3024, "eval_samples_per_second": 67.83, "eval_steps_per_second": 33.915, "step": 20 }, { "epoch": 0.03951081843838194, "grad_norm": 0.22199992835521698, "learning_rate": 4.2178276747988446e-05, "loss": 11.9307, "step": 21 }, { "epoch": 0.045155221072436504, "grad_norm": 0.26525187492370605, "learning_rate": 2.061073738537635e-05, "loss": 11.9266, "step": 24 }, { "epoch": 0.047036688617121354, "eval_loss": 11.929987907409668, "eval_runtime": 3.369, "eval_samples_per_second": 66.489, "eval_steps_per_second": 33.245, "step": 25 }, { "epoch": 0.05079962370649106, "grad_norm": 0.2282623052597046, "learning_rate": 5.449673790581611e-06, "loss": 11.9327, "step": 27 }, { "epoch": 0.05644402634054563, "grad_norm": 0.2645018398761749, "learning_rate": 0.0, "loss": 11.9229, "step": 30 }, { "epoch": 0.05644402634054563, "eval_loss": 11.929728507995605, "eval_runtime": 3.3494, "eval_samples_per_second": 66.877, "eval_steps_per_second": 33.439, "step": 30 } ], "logging_steps": 3, "max_steps": 30, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7617081507840.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }