{ "best_metric": 1.9613432884216309, "best_model_checkpoint": "./outputs/checkpoint-2100", "epoch": 2.9829545454545454, "eval_steps": 100, "global_step": 2100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14, "learning_rate": 0.0002, "loss": 2.5435, "step": 100 }, { "epoch": 0.14, "eval_loss": 2.2340614795684814, "eval_runtime": 54.1619, "eval_samples_per_second": 27.547, "eval_steps_per_second": 3.453, "step": 100 }, { "epoch": 0.28, "learning_rate": 0.0002, "loss": 2.3658, "step": 200 }, { "epoch": 0.28, "eval_loss": 2.193166971206665, "eval_runtime": 51.9149, "eval_samples_per_second": 28.739, "eval_steps_per_second": 3.602, "step": 200 }, { "epoch": 0.43, "learning_rate": 0.0002, "loss": 2.335, "step": 300 }, { "epoch": 0.43, "eval_loss": 2.1651222705841064, "eval_runtime": 51.9178, "eval_samples_per_second": 28.738, "eval_steps_per_second": 3.602, "step": 300 }, { "epoch": 0.57, "learning_rate": 0.0002, "loss": 2.3016, "step": 400 }, { "epoch": 0.57, "eval_loss": 2.1437063217163086, "eval_runtime": 52.0427, "eval_samples_per_second": 28.669, "eval_steps_per_second": 3.593, "step": 400 }, { "epoch": 0.71, "learning_rate": 0.0002, "loss": 2.2801, "step": 500 }, { "epoch": 0.71, "eval_loss": 2.1198127269744873, "eval_runtime": 51.8718, "eval_samples_per_second": 28.763, "eval_steps_per_second": 3.605, "step": 500 }, { "epoch": 0.85, "learning_rate": 0.0002, "loss": 2.2632, "step": 600 }, { "epoch": 0.85, "eval_loss": 2.100426197052002, "eval_runtime": 51.8813, "eval_samples_per_second": 28.758, "eval_steps_per_second": 3.604, "step": 600 }, { "epoch": 0.99, "learning_rate": 0.0002, "loss": 2.2421, "step": 700 }, { "epoch": 0.99, "eval_loss": 2.0882084369659424, "eval_runtime": 51.8914, "eval_samples_per_second": 28.752, "eval_steps_per_second": 3.604, "step": 700 }, { "epoch": 1.14, "learning_rate": 0.0002, "loss": 2.207, "step": 800 }, { "epoch": 1.14, "eval_loss": 2.07395076751709, "eval_runtime": 51.9153, "eval_samples_per_second": 28.739, "eval_steps_per_second": 3.602, "step": 800 }, { "epoch": 1.28, "learning_rate": 0.0002, "loss": 2.1805, "step": 900 }, { "epoch": 1.28, "eval_loss": 2.0585548877716064, "eval_runtime": 51.8541, "eval_samples_per_second": 28.773, "eval_steps_per_second": 3.606, "step": 900 }, { "epoch": 1.42, "learning_rate": 0.0002, "loss": 2.1797, "step": 1000 }, { "epoch": 1.42, "eval_loss": 2.0468482971191406, "eval_runtime": 51.913, "eval_samples_per_second": 28.74, "eval_steps_per_second": 3.602, "step": 1000 }, { "epoch": 1.56, "learning_rate": 0.0002, "loss": 2.1872, "step": 1100 }, { "epoch": 1.56, "eval_loss": 2.037107467651367, "eval_runtime": 52.0873, "eval_samples_per_second": 28.644, "eval_steps_per_second": 3.59, "step": 1100 }, { "epoch": 1.7, "learning_rate": 0.0002, "loss": 2.1651, "step": 1200 }, { "epoch": 1.7, "eval_loss": 2.0301010608673096, "eval_runtime": 51.8485, "eval_samples_per_second": 28.776, "eval_steps_per_second": 3.607, "step": 1200 }, { "epoch": 1.85, "learning_rate": 0.0002, "loss": 2.1465, "step": 1300 }, { "epoch": 1.85, "eval_loss": 2.0145328044891357, "eval_runtime": 51.9711, "eval_samples_per_second": 28.708, "eval_steps_per_second": 3.598, "step": 1300 }, { "epoch": 1.99, "learning_rate": 0.0002, "loss": 2.1501, "step": 1400 }, { "epoch": 1.99, "eval_loss": 2.0102152824401855, "eval_runtime": 51.9656, "eval_samples_per_second": 28.711, "eval_steps_per_second": 3.599, "step": 1400 }, { "epoch": 2.13, "learning_rate": 0.0002, "loss": 2.0982, "step": 1500 }, { "epoch": 2.13, "eval_loss": 2.0040762424468994, "eval_runtime": 51.9149, "eval_samples_per_second": 28.739, "eval_steps_per_second": 3.602, "step": 1500 }, { "epoch": 2.27, "learning_rate": 0.0002, "loss": 2.1027, "step": 1600 }, { "epoch": 2.27, "eval_loss": 1.9931957721710205, "eval_runtime": 51.964, "eval_samples_per_second": 28.712, "eval_steps_per_second": 3.599, "step": 1600 }, { "epoch": 2.41, "learning_rate": 0.0002, "loss": 2.0865, "step": 1700 }, { "epoch": 2.41, "eval_loss": 1.989241123199463, "eval_runtime": 51.9483, "eval_samples_per_second": 28.721, "eval_steps_per_second": 3.6, "step": 1700 }, { "epoch": 2.56, "learning_rate": 0.0002, "loss": 2.0789, "step": 1800 }, { "epoch": 2.56, "eval_loss": 1.9806030988693237, "eval_runtime": 52.181, "eval_samples_per_second": 28.593, "eval_steps_per_second": 3.584, "step": 1800 }, { "epoch": 2.7, "learning_rate": 0.0002, "loss": 2.0924, "step": 1900 }, { "epoch": 2.7, "eval_loss": 1.9745317697525024, "eval_runtime": 52.0357, "eval_samples_per_second": 28.673, "eval_steps_per_second": 3.594, "step": 1900 }, { "epoch": 2.84, "learning_rate": 0.0002, "loss": 2.0839, "step": 2000 }, { "epoch": 2.84, "eval_loss": 1.9676258563995361, "eval_runtime": 52.0774, "eval_samples_per_second": 28.65, "eval_steps_per_second": 3.591, "step": 2000 }, { "epoch": 2.98, "learning_rate": 0.0002, "loss": 2.081, "step": 2100 }, { "epoch": 2.98, "eval_loss": 1.9613432884216309, "eval_runtime": 52.027, "eval_samples_per_second": 28.677, "eval_steps_per_second": 3.594, "step": 2100 } ], "logging_steps": 100, "max_steps": 2112, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "total_flos": 6.092531163629568e+16, "trial_name": null, "trial_params": null }