{ "best_metric": 1.1136090755462646, "best_model_checkpoint": "./outputs/checkpoint-1400", "epoch": 1.8666666666666667, "eval_steps": 100, "global_step": 1400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.13, "learning_rate": 0.0002, "loss": 1.1585, "step": 100 }, { "epoch": 0.13, "eval_loss": 1.2455021142959595, "eval_runtime": 180.387, "eval_samples_per_second": 10.699, "eval_steps_per_second": 1.342, "step": 100 }, { "epoch": 0.27, "learning_rate": 0.0002, "loss": 1.0213, "step": 200 }, { "epoch": 0.27, "eval_loss": 1.2179733514785767, "eval_runtime": 177.6597, "eval_samples_per_second": 10.863, "eval_steps_per_second": 1.362, "step": 200 }, { "epoch": 0.4, "learning_rate": 0.0002, "loss": 1.0046, "step": 300 }, { "epoch": 0.4, "eval_loss": 1.202022671699524, "eval_runtime": 178.5482, "eval_samples_per_second": 10.809, "eval_steps_per_second": 1.355, "step": 300 }, { "epoch": 0.53, "learning_rate": 0.0002, "loss": 0.9918, "step": 400 }, { "epoch": 0.53, "eval_loss": 1.1907917261123657, "eval_runtime": 177.5912, "eval_samples_per_second": 10.868, "eval_steps_per_second": 1.363, "step": 400 }, { "epoch": 0.67, "learning_rate": 0.0002, "loss": 0.9766, "step": 500 }, { "epoch": 0.67, "eval_loss": 1.177116870880127, "eval_runtime": 177.7523, "eval_samples_per_second": 10.858, "eval_steps_per_second": 1.361, "step": 500 }, { "epoch": 0.8, "learning_rate": 0.0002, "loss": 0.9721, "step": 600 }, { "epoch": 0.8, "eval_loss": 1.1658653020858765, "eval_runtime": 177.5002, "eval_samples_per_second": 10.873, "eval_steps_per_second": 1.363, "step": 600 }, { "epoch": 0.93, "learning_rate": 0.0002, "loss": 0.9654, "step": 700 }, { "epoch": 0.93, "eval_loss": 1.1600923538208008, "eval_runtime": 177.776, "eval_samples_per_second": 10.856, "eval_steps_per_second": 1.361, "step": 700 }, { "epoch": 1.07, "learning_rate": 0.0002, "loss": 0.9506, "step": 800 }, { "epoch": 1.07, "eval_loss": 1.1476994752883911, "eval_runtime": 181.2478, "eval_samples_per_second": 10.648, "eval_steps_per_second": 1.335, "step": 800 }, { "epoch": 1.2, "learning_rate": 0.0002, "loss": 0.9393, "step": 900 }, { "epoch": 1.2, "eval_loss": 1.1420533657073975, "eval_runtime": 179.6022, "eval_samples_per_second": 10.746, "eval_steps_per_second": 1.347, "step": 900 }, { "epoch": 1.33, "learning_rate": 0.0002, "loss": 0.9312, "step": 1000 }, { "epoch": 1.33, "eval_loss": 1.1350293159484863, "eval_runtime": 178.8474, "eval_samples_per_second": 10.791, "eval_steps_per_second": 1.353, "step": 1000 }, { "epoch": 1.47, "learning_rate": 0.0002, "loss": 0.9391, "step": 1100 }, { "epoch": 1.47, "eval_loss": 1.1300640106201172, "eval_runtime": 177.6952, "eval_samples_per_second": 10.861, "eval_steps_per_second": 1.362, "step": 1100 }, { "epoch": 1.6, "learning_rate": 0.0002, "loss": 0.9278, "step": 1200 }, { "epoch": 1.6, "eval_loss": 1.1249330043792725, "eval_runtime": 177.6317, "eval_samples_per_second": 10.865, "eval_steps_per_second": 1.362, "step": 1200 }, { "epoch": 1.73, "learning_rate": 0.0002, "loss": 0.9172, "step": 1300 }, { "epoch": 1.73, "eval_loss": 1.116977572441101, "eval_runtime": 177.8374, "eval_samples_per_second": 10.853, "eval_steps_per_second": 1.361, "step": 1300 }, { "epoch": 1.87, "learning_rate": 0.0002, "loss": 0.9175, "step": 1400 }, { "epoch": 1.87, "eval_loss": 1.1136090755462646, "eval_runtime": 177.225, "eval_samples_per_second": 10.89, "eval_steps_per_second": 1.365, "step": 1400 } ], "logging_steps": 100, "max_steps": 2250, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "total_flos": 1.0464609713209344e+17, "trial_name": null, "trial_params": null }