{ "best_metric": 0.9542251825332642, "best_model_checkpoint": "./outputs/checkpoint-2100", "epoch": 2.9829545454545454, "eval_steps": 100, "global_step": 2100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14, "learning_rate": 0.0002, "loss": 1.5836, "step": 100 }, { "epoch": 0.14, "eval_loss": 1.0926024913787842, "eval_runtime": 92.4288, "eval_samples_per_second": 16.142, "eval_steps_per_second": 2.023, "step": 100 }, { "epoch": 0.28, "learning_rate": 0.0002, "loss": 1.2272, "step": 200 }, { "epoch": 0.28, "eval_loss": 1.071556806564331, "eval_runtime": 89.865, "eval_samples_per_second": 16.603, "eval_steps_per_second": 2.081, "step": 200 }, { "epoch": 0.43, "learning_rate": 0.0002, "loss": 1.2134, "step": 300 }, { "epoch": 0.43, "eval_loss": 1.0566277503967285, "eval_runtime": 89.873, "eval_samples_per_second": 16.601, "eval_steps_per_second": 2.081, "step": 300 }, { "epoch": 0.57, "learning_rate": 0.0002, "loss": 1.1959, "step": 400 }, { "epoch": 0.57, "eval_loss": 1.0465874671936035, "eval_runtime": 89.8186, "eval_samples_per_second": 16.611, "eval_steps_per_second": 2.082, "step": 400 }, { "epoch": 0.71, "learning_rate": 0.0002, "loss": 1.1861, "step": 500 }, { "epoch": 0.71, "eval_loss": 1.0348858833312988, "eval_runtime": 89.7272, "eval_samples_per_second": 16.628, "eval_steps_per_second": 2.084, "step": 500 }, { "epoch": 0.85, "learning_rate": 0.0002, "loss": 1.1735, "step": 600 }, { "epoch": 0.85, "eval_loss": 1.0253515243530273, "eval_runtime": 89.7076, "eval_samples_per_second": 16.632, "eval_steps_per_second": 2.085, "step": 600 }, { "epoch": 0.99, "learning_rate": 0.0002, "loss": 1.1615, "step": 700 }, { "epoch": 0.99, "eval_loss": 1.0192995071411133, "eval_runtime": 89.8909, "eval_samples_per_second": 16.598, "eval_steps_per_second": 2.08, "step": 700 }, { "epoch": 1.14, "learning_rate": 0.0002, "loss": 1.1509, "step": 800 }, { "epoch": 1.14, "eval_loss": 1.0122530460357666, "eval_runtime": 89.7843, "eval_samples_per_second": 16.618, "eval_steps_per_second": 2.083, "step": 800 }, { "epoch": 1.28, "learning_rate": 0.0002, "loss": 1.1265, "step": 900 }, { "epoch": 1.28, "eval_loss": 1.0055540800094604, "eval_runtime": 89.7773, "eval_samples_per_second": 16.619, "eval_steps_per_second": 2.083, "step": 900 }, { "epoch": 1.42, "learning_rate": 0.0002, "loss": 1.1298, "step": 1000 }, { "epoch": 1.42, "eval_loss": 1.0025967359542847, "eval_runtime": 89.9303, "eval_samples_per_second": 16.591, "eval_steps_per_second": 2.079, "step": 1000 }, { "epoch": 1.56, "learning_rate": 0.0002, "loss": 1.1323, "step": 1100 }, { "epoch": 1.56, "eval_loss": 0.9946721792221069, "eval_runtime": 90.1252, "eval_samples_per_second": 16.555, "eval_steps_per_second": 2.075, "step": 1100 }, { "epoch": 1.7, "learning_rate": 0.0002, "loss": 1.1193, "step": 1200 }, { "epoch": 1.7, "eval_loss": 0.9902428984642029, "eval_runtime": 89.9449, "eval_samples_per_second": 16.588, "eval_steps_per_second": 2.079, "step": 1200 }, { "epoch": 1.85, "learning_rate": 0.0002, "loss": 1.1111, "step": 1300 }, { "epoch": 1.85, "eval_loss": 0.9833947420120239, "eval_runtime": 90.3396, "eval_samples_per_second": 16.515, "eval_steps_per_second": 2.07, "step": 1300 }, { "epoch": 1.99, "learning_rate": 0.0002, "loss": 1.1141, "step": 1400 }, { "epoch": 1.99, "eval_loss": 0.9805576205253601, "eval_runtime": 89.7687, "eval_samples_per_second": 16.62, "eval_steps_per_second": 2.083, "step": 1400 }, { "epoch": 2.13, "learning_rate": 0.0002, "loss": 1.0911, "step": 1500 }, { "epoch": 2.13, "eval_loss": 0.9768579602241516, "eval_runtime": 89.855, "eval_samples_per_second": 16.605, "eval_steps_per_second": 2.081, "step": 1500 }, { "epoch": 2.27, "learning_rate": 0.0002, "loss": 1.0919, "step": 1600 }, { "epoch": 2.27, "eval_loss": 0.9718981981277466, "eval_runtime": 89.8866, "eval_samples_per_second": 16.599, "eval_steps_per_second": 2.08, "step": 1600 }, { "epoch": 2.41, "learning_rate": 0.0002, "loss": 1.073, "step": 1700 }, { "epoch": 2.41, "eval_loss": 0.9693555235862732, "eval_runtime": 89.9595, "eval_samples_per_second": 16.585, "eval_steps_per_second": 2.079, "step": 1700 }, { "epoch": 2.56, "learning_rate": 0.0002, "loss": 1.0759, "step": 1800 }, { "epoch": 2.56, "eval_loss": 0.9647061228752136, "eval_runtime": 90.1576, "eval_samples_per_second": 16.549, "eval_steps_per_second": 2.074, "step": 1800 }, { "epoch": 2.7, "learning_rate": 0.0002, "loss": 1.0784, "step": 1900 }, { "epoch": 2.7, "eval_loss": 0.961346447467804, "eval_runtime": 89.8749, "eval_samples_per_second": 16.601, "eval_steps_per_second": 2.081, "step": 1900 }, { "epoch": 2.84, "learning_rate": 0.0002, "loss": 1.0755, "step": 2000 }, { "epoch": 2.84, "eval_loss": 0.9575291275978088, "eval_runtime": 89.8309, "eval_samples_per_second": 16.609, "eval_steps_per_second": 2.082, "step": 2000 }, { "epoch": 2.98, "learning_rate": 0.0002, "loss": 1.0721, "step": 2100 }, { "epoch": 2.98, "eval_loss": 0.9542251825332642, "eval_runtime": 89.9742, "eval_samples_per_second": 16.583, "eval_steps_per_second": 2.078, "step": 2100 } ], "logging_steps": 100, "max_steps": 2112, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "total_flos": 1.1673223042295808e+17, "trial_name": null, "trial_params": null }