{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.18570102135561745, "eval_steps": 10, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.003714020427112349, "eval_loss": 2.625425100326538, "eval_runtime": 11.9889, "eval_samples_per_second": 9.509, "eval_steps_per_second": 4.754, "step": 1 }, { "epoch": 0.018570102135561744, "grad_norm": 0.3816036283969879, "learning_rate": 5e-05, "loss": 2.7497, "step": 5 }, { "epoch": 0.03714020427112349, "grad_norm": 0.574531614780426, "learning_rate": 0.0001, "loss": 2.6056, "step": 10 }, { "epoch": 0.03714020427112349, "eval_loss": 2.5287303924560547, "eval_runtime": 12.1391, "eval_samples_per_second": 9.391, "eval_steps_per_second": 4.696, "step": 10 }, { "epoch": 0.055710306406685235, "grad_norm": 0.32838571071624756, "learning_rate": 9.619397662556435e-05, "loss": 2.487, "step": 15 }, { "epoch": 0.07428040854224698, "grad_norm": 0.2884520888328552, "learning_rate": 8.535533905932738e-05, "loss": 2.434, "step": 20 }, { "epoch": 0.07428040854224698, "eval_loss": 2.443124294281006, "eval_runtime": 12.2351, "eval_samples_per_second": 9.317, "eval_steps_per_second": 4.659, "step": 20 }, { "epoch": 0.09285051067780872, "grad_norm": 0.27789685130119324, "learning_rate": 6.91341716182545e-05, "loss": 2.511, "step": 25 }, { "epoch": 0.11142061281337047, "grad_norm": 0.2861422896385193, "learning_rate": 5e-05, "loss": 2.4991, "step": 30 }, { "epoch": 0.11142061281337047, "eval_loss": 2.4089643955230713, "eval_runtime": 12.3034, "eval_samples_per_second": 9.266, "eval_steps_per_second": 4.633, "step": 30 }, { "epoch": 0.12999071494893222, "grad_norm": 0.26488766074180603, "learning_rate": 3.086582838174551e-05, "loss": 2.4306, "step": 35 }, { "epoch": 0.14856081708449395, "grad_norm": 0.2455853372812271, "learning_rate": 1.4644660940672627e-05, "loss": 2.4061, "step": 40 }, { "epoch": 0.14856081708449395, "eval_loss": 2.3940765857696533, "eval_runtime": 12.3681, "eval_samples_per_second": 9.217, "eval_steps_per_second": 4.609, "step": 40 }, { "epoch": 0.1671309192200557, "grad_norm": 0.2584855258464813, "learning_rate": 3.8060233744356633e-06, "loss": 2.4314, "step": 45 }, { "epoch": 0.18570102135561745, "grad_norm": 0.24273546040058136, "learning_rate": 0.0, "loss": 2.3304, "step": 50 }, { "epoch": 0.18570102135561745, "eval_loss": 2.3918755054473877, "eval_runtime": 12.2602, "eval_samples_per_second": 9.298, "eval_steps_per_second": 4.649, "step": 50 } ], "logging_steps": 5, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 13, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.6246613030731776e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }