{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.37894736842105264, "eval_steps": 63, "global_step": 126, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0030075187969924814, "eval_loss": 10.374281883239746, "eval_runtime": 0.3586, "eval_samples_per_second": 390.36, "eval_steps_per_second": 195.18, "step": 1 }, { "epoch": 0.03007518796992481, "grad_norm": 0.060663752257823944, "learning_rate": 0.00019979453927503364, "loss": 10.3728, "step": 10 }, { "epoch": 0.06015037593984962, "grad_norm": 0.05574655905365944, "learning_rate": 0.00019815591569910654, "loss": 10.3704, "step": 20 }, { "epoch": 0.09022556390977443, "grad_norm": 0.07972452044487, "learning_rate": 0.00019490557470106686, "loss": 10.3695, "step": 30 }, { "epoch": 0.12030075187969924, "grad_norm": 0.14503557980060577, "learning_rate": 0.0001900968867902419, "loss": 10.3621, "step": 40 }, { "epoch": 0.15037593984962405, "grad_norm": 0.13525503873825073, "learning_rate": 0.00018380881048918405, "loss": 10.3626, "step": 50 }, { "epoch": 0.18045112781954886, "grad_norm": 0.09932214021682739, "learning_rate": 0.00017614459583691346, "loss": 10.3584, "step": 60 }, { "epoch": 0.18947368421052632, "eval_loss": 10.356450080871582, "eval_runtime": 0.3534, "eval_samples_per_second": 396.183, "eval_steps_per_second": 198.091, "step": 63 }, { "epoch": 0.21052631578947367, "grad_norm": 0.1384136825799942, "learning_rate": 0.0001672300890261317, "loss": 10.3572, "step": 70 }, { "epoch": 0.24060150375939848, "grad_norm": 0.1283988505601883, "learning_rate": 0.00015721166601221698, "loss": 10.353, "step": 80 }, { "epoch": 0.2706766917293233, "grad_norm": 0.11646091192960739, "learning_rate": 0.00014625382902408356, "loss": 10.351, "step": 90 }, { "epoch": 0.3007518796992481, "grad_norm": 0.1207568570971489, "learning_rate": 0.00013453650544213076, "loss": 10.3498, "step": 100 }, { "epoch": 0.3308270676691729, "grad_norm": 0.09924621134996414, "learning_rate": 0.00012225209339563145, "loss": 10.3497, "step": 110 }, { "epoch": 0.3609022556390977, "grad_norm": 0.08627820760011673, "learning_rate": 0.00010960230259076818, "loss": 10.3494, "step": 120 }, { "epoch": 0.37894736842105264, "eval_loss": 10.348790168762207, "eval_runtime": 0.3677, "eval_samples_per_second": 380.716, "eval_steps_per_second": 190.358, "step": 126 } ], "logging_steps": 10, "max_steps": 250, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 63, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1629294428160.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }