{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9991993594875901, "eval_steps": 4, "global_step": 78, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.051240992794235385, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 1.1101, "step": 4 }, { "epoch": 0.10248198558847077, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 1.085, "step": 8 }, { "epoch": 0.15372297838270615, "grad_norm": 0.19277238845825195, "learning_rate": 0.002, "loss": 1.1055, "step": 12 }, { "epoch": 0.20496397117694154, "grad_norm": 0.2435273677110672, "learning_rate": 0.0011547005383792518, "loss": 1.0199, "step": 16 }, { "epoch": 0.2562049639711769, "grad_norm": 1.447454810142517, "learning_rate": 0.0008944271909999159, "loss": 1.1178, "step": 20 }, { "epoch": 0.3074459567654123, "grad_norm": 0.34243044257164, "learning_rate": 0.0006666666666666666, "loss": 0.8637, "step": 24 }, { "epoch": 0.3586869495596477, "grad_norm": 0.18111398816108704, "learning_rate": 0.0005547001962252292, "loss": 0.8076, "step": 28 }, { "epoch": 0.4099279423538831, "grad_norm": 0.16578800976276398, "learning_rate": 0.0004850712500726659, "loss": 0.7863, "step": 32 }, { "epoch": 0.4611689351481185, "grad_norm": 0.18639568984508514, "learning_rate": 0.0004364357804719848, "loss": 0.7413, "step": 36 }, { "epoch": 0.5124099279423538, "grad_norm": 0.29591241478919983, "learning_rate": 0.0004, "loss": 0.7666, "step": 40 }, { "epoch": 0.5636509207365893, "grad_norm": 0.20059671998023987, "learning_rate": 0.0003713906763541037, "loss": 0.8008, "step": 44 }, { "epoch": 0.6148919135308246, "grad_norm": 0.16686993837356567, "learning_rate": 0.0003481553119113957, "loss": 0.7543, "step": 48 }, { "epoch": 0.6661329063250601, "grad_norm": 0.16397202014923096, "learning_rate": 0.0003287979746107146, "loss": 0.7338, "step": 52 }, { "epoch": 0.7173738991192954, "grad_norm": 0.1775909960269928, "learning_rate": 0.0003123475237772121, "loss": 0.7323, "step": 56 }, { "epoch": 0.7686148919135308, "grad_norm": 0.17287716269493103, "learning_rate": 0.00029814239699997195, "loss": 0.7414, "step": 60 }, { "epoch": 0.8198558847077662, "grad_norm": 0.19447582960128784, "learning_rate": 0.0002857142857142857, "loss": 0.7489, "step": 64 }, { "epoch": 0.8710968775020016, "grad_norm": 0.15788140892982483, "learning_rate": 0.0002747211278973781, "loss": 0.7467, "step": 68 }, { "epoch": 0.922337870296237, "grad_norm": 0.18027225136756897, "learning_rate": 0.00026490647141300875, "loss": 0.734, "step": 72 }, { "epoch": 0.9735788630904724, "grad_norm": 0.1561153680086136, "learning_rate": 0.000256073759865792, "loss": 0.7291, "step": 76 }, { "epoch": 0.9991993594875901, "step": 78, "total_flos": 7.922041751265608e+17, "train_loss": 0.8468386912957216, "train_runtime": 778.0462, "train_samples_per_second": 12.84, "train_steps_per_second": 0.1 } ], "logging_steps": 4, "max_steps": 78, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 4, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.922041751265608e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }