{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.28293545534924847, "eval_steps": 5, "global_step": 20, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.014146772767462422, "grad_norm": 4.968000411987305, "learning_rate": 1e-05, "loss": 8.3533, "step": 1 }, { "epoch": 0.014146772767462422, "eval_loss": 7.851544380187988, "eval_runtime": 85.5599, "eval_samples_per_second": 1.403, "eval_steps_per_second": 0.701, "step": 1 }, { "epoch": 0.028293545534924844, "grad_norm": 4.63974142074585, "learning_rate": 2e-05, "loss": 7.7908, "step": 2 }, { "epoch": 0.042440318302387266, "grad_norm": 5.394748687744141, "learning_rate": 3e-05, "loss": 7.5542, "step": 3 }, { "epoch": 0.05658709106984969, "grad_norm": 4.698472023010254, "learning_rate": 4e-05, "loss": 7.7776, "step": 4 }, { "epoch": 0.07073386383731212, "grad_norm": 5.1544108390808105, "learning_rate": 5e-05, "loss": 7.3847, "step": 5 }, { "epoch": 0.07073386383731212, "eval_loss": 7.290081977844238, "eval_runtime": 5.8165, "eval_samples_per_second": 20.631, "eval_steps_per_second": 10.316, "step": 5 }, { "epoch": 0.08488063660477453, "grad_norm": 5.7256669998168945, "learning_rate": 6e-05, "loss": 7.4766, "step": 6 }, { "epoch": 0.09902740937223696, "grad_norm": 5.811617374420166, "learning_rate": 7e-05, "loss": 7.0652, "step": 7 }, { "epoch": 0.11317418213969938, "grad_norm": 6.549645900726318, "learning_rate": 8e-05, "loss": 6.8741, "step": 8 }, { "epoch": 0.1273209549071618, "grad_norm": 7.107234477996826, "learning_rate": 9e-05, "loss": 6.21, "step": 9 }, { "epoch": 0.14146772767462423, "grad_norm": 6.856642246246338, "learning_rate": 0.0001, "loss": 5.349, "step": 10 }, { "epoch": 0.14146772767462423, "eval_loss": 4.707101345062256, "eval_runtime": 5.8383, "eval_samples_per_second": 20.554, "eval_steps_per_second": 10.277, "step": 10 }, { "epoch": 0.15561450044208666, "grad_norm": 7.615804195404053, "learning_rate": 9.755282581475769e-05, "loss": 4.8278, "step": 11 }, { "epoch": 0.16976127320954906, "grad_norm": 7.701565742492676, "learning_rate": 9.045084971874738e-05, "loss": 3.9847, "step": 12 }, { "epoch": 0.1839080459770115, "grad_norm": 6.278581619262695, "learning_rate": 7.938926261462366e-05, "loss": 3.133, "step": 13 }, { "epoch": 0.19805481874447392, "grad_norm": 6.357863903045654, "learning_rate": 6.545084971874738e-05, "loss": 2.2536, "step": 14 }, { "epoch": 0.21220159151193635, "grad_norm": 4.74558687210083, "learning_rate": 5e-05, "loss": 1.9522, "step": 15 }, { "epoch": 0.21220159151193635, "eval_loss": 1.695694088935852, "eval_runtime": 5.7854, "eval_samples_per_second": 20.742, "eval_steps_per_second": 10.371, "step": 15 }, { "epoch": 0.22634836427939875, "grad_norm": 4.90279483795166, "learning_rate": 3.4549150281252636e-05, "loss": 1.6686, "step": 16 }, { "epoch": 0.24049513704686118, "grad_norm": 4.2148027420043945, "learning_rate": 2.061073738537635e-05, "loss": 1.4045, "step": 17 }, { "epoch": 0.2546419098143236, "grad_norm": 4.100856304168701, "learning_rate": 9.549150281252633e-06, "loss": 1.3224, "step": 18 }, { "epoch": 0.268788682581786, "grad_norm": 2.986417531967163, "learning_rate": 2.4471741852423237e-06, "loss": 1.1764, "step": 19 }, { "epoch": 0.28293545534924847, "grad_norm": 4.208008766174316, "learning_rate": 0.0, "loss": 1.7821, "step": 20 }, { "epoch": 0.28293545534924847, "eval_loss": 1.1980129480361938, "eval_runtime": 5.8173, "eval_samples_per_second": 20.628, "eval_steps_per_second": 10.314, "step": 20 } ], "logging_steps": 1, "max_steps": 20, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 5, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.3388561252352e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }