{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.057927961381359076, "eval_steps": 13, "global_step": 39, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0014853323431117712, "eval_loss": 0.44206228852272034, "eval_runtime": 6.4914, "eval_samples_per_second": 174.692, "eval_steps_per_second": 21.875, "step": 1 }, { "epoch": 0.007426661715558856, "grad_norm": 1.8116068840026855, "learning_rate": 5e-05, "loss": 0.3882, "step": 5 }, { "epoch": 0.014853323431117713, "grad_norm": 1.2223440408706665, "learning_rate": 0.0001, "loss": 0.288, "step": 10 }, { "epoch": 0.019309320460453028, "eval_loss": 0.030824489891529083, "eval_runtime": 6.4833, "eval_samples_per_second": 174.911, "eval_steps_per_second": 21.902, "step": 13 }, { "epoch": 0.02227998514667657, "grad_norm": 0.8314740657806396, "learning_rate": 9.619397662556435e-05, "loss": 0.0695, "step": 15 }, { "epoch": 0.029706646862235425, "grad_norm": 0.18348777294158936, "learning_rate": 8.535533905932738e-05, "loss": 0.0089, "step": 20 }, { "epoch": 0.03713330857779428, "grad_norm": 0.10115423798561096, "learning_rate": 6.91341716182545e-05, "loss": 0.0029, "step": 25 }, { "epoch": 0.038618640920906055, "eval_loss": 0.0019636948127299547, "eval_runtime": 6.4581, "eval_samples_per_second": 175.594, "eval_steps_per_second": 21.988, "step": 26 }, { "epoch": 0.04455997029335314, "grad_norm": 0.027285493910312653, "learning_rate": 5e-05, "loss": 0.0019, "step": 30 }, { "epoch": 0.051986632008911994, "grad_norm": 0.12763981521129608, "learning_rate": 3.086582838174551e-05, "loss": 0.002, "step": 35 }, { "epoch": 0.057927961381359076, "eval_loss": 0.0011094427900388837, "eval_runtime": 6.4923, "eval_samples_per_second": 174.67, "eval_steps_per_second": 21.872, "step": 39 } ], "logging_steps": 5, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 13, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1388995338567680.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }