{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.1813236627379873, "eval_steps": 5, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.003626473254759746, "eval_loss": 10.378419876098633, "eval_runtime": 3.4259, "eval_samples_per_second": 286.053, "eval_steps_per_second": 35.903, "step": 1 }, { "epoch": 0.010879419764279238, "grad_norm": 0.028440434485673904, "learning_rate": 3e-05, "loss": 10.3783, "step": 3 }, { "epoch": 0.01813236627379873, "eval_loss": 10.378182411193848, "eval_runtime": 3.4267, "eval_samples_per_second": 285.988, "eval_steps_per_second": 35.894, "step": 5 }, { "epoch": 0.021758839528558477, "grad_norm": 0.029924781993031502, "learning_rate": 6e-05, "loss": 10.3786, "step": 6 }, { "epoch": 0.032638259292837715, "grad_norm": 0.032576046884059906, "learning_rate": 9e-05, "loss": 10.3787, "step": 9 }, { "epoch": 0.03626473254759746, "eval_loss": 10.377327919006348, "eval_runtime": 3.4705, "eval_samples_per_second": 282.383, "eval_steps_per_second": 35.442, "step": 10 }, { "epoch": 0.043517679057116954, "grad_norm": 0.033237360417842865, "learning_rate": 0.00012, "loss": 10.3777, "step": 12 }, { "epoch": 0.05439709882139619, "grad_norm": 0.037652939558029175, "learning_rate": 0.00015000000000000001, "loss": 10.3763, "step": 15 }, { "epoch": 0.05439709882139619, "eval_loss": 10.37564754486084, "eval_runtime": 3.4616, "eval_samples_per_second": 283.109, "eval_steps_per_second": 35.533, "step": 15 }, { "epoch": 0.06527651858567543, "grad_norm": 0.04887213185429573, "learning_rate": 0.00018, "loss": 10.3747, "step": 18 }, { "epoch": 0.07252946509519492, "eval_loss": 10.372203826904297, "eval_runtime": 3.4694, "eval_samples_per_second": 282.472, "eval_steps_per_second": 35.453, "step": 20 }, { "epoch": 0.07615593834995467, "grad_norm": 0.07486271858215332, "learning_rate": 0.00019945218953682734, "loss": 10.3732, "step": 21 }, { "epoch": 0.08703535811423391, "grad_norm": 0.10773558914661407, "learning_rate": 0.0001913545457642601, "loss": 10.3699, "step": 24 }, { "epoch": 0.09066183136899365, "eval_loss": 10.365472793579102, "eval_runtime": 3.4645, "eval_samples_per_second": 282.866, "eval_steps_per_second": 35.503, "step": 25 }, { "epoch": 0.09791477787851315, "grad_norm": 0.10963417589664459, "learning_rate": 0.00017431448254773944, "loss": 10.3655, "step": 27 }, { "epoch": 0.10879419764279238, "grad_norm": 0.09174247831106186, "learning_rate": 0.00015000000000000001, "loss": 10.3617, "step": 30 }, { "epoch": 0.10879419764279238, "eval_loss": 10.359057426452637, "eval_runtime": 3.4524, "eval_samples_per_second": 283.857, "eval_steps_per_second": 35.627, "step": 30 }, { "epoch": 0.11967361740707162, "grad_norm": 0.06661375612020493, "learning_rate": 0.00012079116908177593, "loss": 10.3586, "step": 33 }, { "epoch": 0.12692656391659113, "eval_loss": 10.356401443481445, "eval_runtime": 3.4528, "eval_samples_per_second": 283.831, "eval_steps_per_second": 35.624, "step": 35 }, { "epoch": 0.13055303717135086, "grad_norm": 0.06876756995916367, "learning_rate": 8.954715367323468e-05, "loss": 10.357, "step": 36 }, { "epoch": 0.1414324569356301, "grad_norm": 0.051274798810482025, "learning_rate": 5.9326335692419995e-05, "loss": 10.356, "step": 39 }, { "epoch": 0.14505893019038985, "eval_loss": 10.355536460876465, "eval_runtime": 3.4523, "eval_samples_per_second": 283.867, "eval_steps_per_second": 35.628, "step": 40 }, { "epoch": 0.15231187669990934, "grad_norm": 0.03581469506025314, "learning_rate": 3.308693936411421e-05, "loss": 10.3554, "step": 42 }, { "epoch": 0.1631912964641886, "grad_norm": 0.038194689899683, "learning_rate": 1.339745962155613e-05, "loss": 10.3555, "step": 45 }, { "epoch": 0.1631912964641886, "eval_loss": 10.355265617370605, "eval_runtime": 3.4538, "eval_samples_per_second": 283.743, "eval_steps_per_second": 35.613, "step": 45 }, { "epoch": 0.17407071622846781, "grad_norm": 0.037530913949012756, "learning_rate": 2.1852399266194314e-06, "loss": 10.3563, "step": 48 }, { "epoch": 0.1813236627379873, "eval_loss": 10.355225563049316, "eval_runtime": 3.4386, "eval_samples_per_second": 285.001, "eval_steps_per_second": 35.77, "step": 50 } ], "logging_steps": 3, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 5, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 11155066060800.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }