{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.0653061224489796, "eval_steps": 5, "global_step": 20, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0032653061224489797, "grad_norm": 0.3841452896595001, "learning_rate": 1e-05, "loss": 10.8294, "step": 1 }, { "epoch": 0.0032653061224489797, "eval_loss": 10.841578483581543, "eval_runtime": 8.2506, "eval_samples_per_second": 62.541, "eval_steps_per_second": 31.271, "step": 1 }, { "epoch": 0.006530612244897959, "grad_norm": 0.3547123670578003, "learning_rate": 2e-05, "loss": 10.8332, "step": 2 }, { "epoch": 0.009795918367346938, "grad_norm": 0.37293732166290283, "learning_rate": 3e-05, "loss": 10.8352, "step": 3 }, { "epoch": 0.013061224489795919, "grad_norm": 0.35608258843421936, "learning_rate": 4e-05, "loss": 10.8397, "step": 4 }, { "epoch": 0.0163265306122449, "grad_norm": 0.3527994751930237, "learning_rate": 5e-05, "loss": 10.8465, "step": 5 }, { "epoch": 0.0163265306122449, "eval_loss": 10.840397834777832, "eval_runtime": 0.7152, "eval_samples_per_second": 721.472, "eval_steps_per_second": 360.736, "step": 5 }, { "epoch": 0.019591836734693877, "grad_norm": 0.33146461844444275, "learning_rate": 6e-05, "loss": 10.8378, "step": 6 }, { "epoch": 0.022857142857142857, "grad_norm": 0.3458758294582367, "learning_rate": 7e-05, "loss": 10.8361, "step": 7 }, { "epoch": 0.026122448979591838, "grad_norm": 0.3253280520439148, "learning_rate": 8e-05, "loss": 10.8374, "step": 8 }, { "epoch": 0.029387755102040815, "grad_norm": 0.3079422116279602, "learning_rate": 9e-05, "loss": 10.8323, "step": 9 }, { "epoch": 0.0326530612244898, "grad_norm": 0.308852881193161, "learning_rate": 0.0001, "loss": 10.8422, "step": 10 }, { "epoch": 0.0326530612244898, "eval_loss": 10.835927963256836, "eval_runtime": 0.7246, "eval_samples_per_second": 712.072, "eval_steps_per_second": 356.036, "step": 10 }, { "epoch": 0.035918367346938776, "grad_norm": 0.30548781156539917, "learning_rate": 9.755282581475769e-05, "loss": 10.8319, "step": 11 }, { "epoch": 0.03918367346938775, "grad_norm": 0.363088458776474, "learning_rate": 9.045084971874738e-05, "loss": 10.8294, "step": 12 }, { "epoch": 0.04244897959183674, "grad_norm": 0.324341744184494, "learning_rate": 7.938926261462366e-05, "loss": 10.8268, "step": 13 }, { "epoch": 0.045714285714285714, "grad_norm": 0.35678061842918396, "learning_rate": 6.545084971874738e-05, "loss": 10.8294, "step": 14 }, { "epoch": 0.04897959183673469, "grad_norm": 0.3367568552494049, "learning_rate": 5e-05, "loss": 10.8316, "step": 15 }, { "epoch": 0.04897959183673469, "eval_loss": 10.830291748046875, "eval_runtime": 0.7722, "eval_samples_per_second": 668.196, "eval_steps_per_second": 334.098, "step": 15 }, { "epoch": 0.052244897959183675, "grad_norm": 0.3325510323047638, "learning_rate": 3.4549150281252636e-05, "loss": 10.8348, "step": 16 }, { "epoch": 0.05551020408163265, "grad_norm": 0.32736244797706604, "learning_rate": 2.061073738537635e-05, "loss": 10.8271, "step": 17 }, { "epoch": 0.05877551020408163, "grad_norm": 0.33501970767974854, "learning_rate": 9.549150281252633e-06, "loss": 10.828, "step": 18 }, { "epoch": 0.062040816326530614, "grad_norm": 0.38715001940727234, "learning_rate": 2.4471741852423237e-06, "loss": 10.8287, "step": 19 }, { "epoch": 0.0653061224489796, "grad_norm": 0.3634048402309418, "learning_rate": 0.0, "loss": 10.8176, "step": 20 }, { "epoch": 0.0653061224489796, "eval_loss": 10.828740119934082, "eval_runtime": 0.7528, "eval_samples_per_second": 685.416, "eval_steps_per_second": 342.708, "step": 20 } ], "logging_steps": 1, "max_steps": 20, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 5, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6562366095360.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }