{ "best_metric": 0.9102813601493835, "best_model_checkpoint": "./outputs/checkpoint-2800", "epoch": 3.7333333333333334, "eval_steps": 100, "global_step": 2800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.13, "learning_rate": 0.0002, "loss": 1.001, "step": 100 }, { "epoch": 0.13, "eval_loss": 1.0910990238189697, "eval_runtime": 728.6528, "eval_samples_per_second": 2.649, "eval_steps_per_second": 0.332, "step": 100 }, { "epoch": 0.27, "learning_rate": 0.0002, "loss": 0.8971, "step": 200 }, { "epoch": 0.27, "eval_loss": 1.0677164793014526, "eval_runtime": 727.2702, "eval_samples_per_second": 2.654, "eval_steps_per_second": 0.333, "step": 200 }, { "epoch": 0.4, "learning_rate": 0.0002, "loss": 0.8815, "step": 300 }, { "epoch": 0.4, "eval_loss": 1.052659273147583, "eval_runtime": 730.164, "eval_samples_per_second": 2.643, "eval_steps_per_second": 0.331, "step": 300 }, { "epoch": 0.53, "learning_rate": 0.0002, "loss": 0.8695, "step": 400 }, { "epoch": 0.53, "eval_loss": 1.0382879972457886, "eval_runtime": 732.8796, "eval_samples_per_second": 2.633, "eval_steps_per_second": 0.33, "step": 400 }, { "epoch": 0.67, "learning_rate": 0.0002, "loss": 0.8548, "step": 500 }, { "epoch": 0.67, "eval_loss": 1.0260452032089233, "eval_runtime": 730.1971, "eval_samples_per_second": 2.643, "eval_steps_per_second": 0.331, "step": 500 }, { "epoch": 0.8, "learning_rate": 0.0002, "loss": 0.8496, "step": 600 }, { "epoch": 0.8, "eval_loss": 1.0152662992477417, "eval_runtime": 732.4802, "eval_samples_per_second": 2.635, "eval_steps_per_second": 0.33, "step": 600 }, { "epoch": 0.93, "learning_rate": 0.0002, "loss": 0.8445, "step": 700 }, { "epoch": 0.93, "eval_loss": 1.0105178356170654, "eval_runtime": 802.0691, "eval_samples_per_second": 2.406, "eval_steps_per_second": 0.302, "step": 700 }, { "epoch": 1.07, "learning_rate": 0.0002, "loss": 0.8346, "step": 800 }, { "epoch": 1.07, "eval_loss": 1.0066964626312256, "eval_runtime": 789.7964, "eval_samples_per_second": 2.444, "eval_steps_per_second": 0.306, "step": 800 }, { "epoch": 1.2, "learning_rate": 0.0002, "loss": 0.8199, "step": 900 }, { "epoch": 1.2, "eval_loss": 0.9989904165267944, "eval_runtime": 786.9058, "eval_samples_per_second": 2.453, "eval_steps_per_second": 0.308, "step": 900 }, { "epoch": 1.33, "learning_rate": 0.0002, "loss": 0.8133, "step": 1000 }, { "epoch": 1.33, "eval_loss": 0.9895688891410828, "eval_runtime": 782.3169, "eval_samples_per_second": 2.467, "eval_steps_per_second": 0.309, "step": 1000 }, { "epoch": 1.47, "learning_rate": 0.0002, "loss": 0.8193, "step": 1100 }, { "epoch": 1.47, "eval_loss": 0.9853964447975159, "eval_runtime": 777.984, "eval_samples_per_second": 2.481, "eval_steps_per_second": 0.311, "step": 1100 }, { "epoch": 1.6, "learning_rate": 0.0002, "loss": 0.8091, "step": 1200 }, { "epoch": 1.6, "eval_loss": 0.9784607887268066, "eval_runtime": 780.7522, "eval_samples_per_second": 2.472, "eval_steps_per_second": 0.31, "step": 1200 }, { "epoch": 1.73, "learning_rate": 0.0002, "loss": 0.7983, "step": 1300 }, { "epoch": 1.73, "eval_loss": 0.9710213541984558, "eval_runtime": 787.7479, "eval_samples_per_second": 2.45, "eval_steps_per_second": 0.307, "step": 1300 }, { "epoch": 1.87, "learning_rate": 0.0002, "loss": 0.7971, "step": 1400 }, { "epoch": 1.87, "eval_loss": 0.9653750658035278, "eval_runtime": 777.1907, "eval_samples_per_second": 2.483, "eval_steps_per_second": 0.311, "step": 1400 }, { "epoch": 2.0, "learning_rate": 0.0002, "loss": 0.7899, "step": 1500 }, { "epoch": 2.0, "eval_loss": 0.9598689675331116, "eval_runtime": 788.3937, "eval_samples_per_second": 2.448, "eval_steps_per_second": 0.307, "step": 1500 }, { "epoch": 2.13, "learning_rate": 0.0002, "loss": 0.7689, "step": 1600 }, { "epoch": 2.13, "eval_loss": 0.9570510983467102, "eval_runtime": 782.2868, "eval_samples_per_second": 2.467, "eval_steps_per_second": 0.309, "step": 1600 }, { "epoch": 2.27, "learning_rate": 0.0002, "loss": 0.7676, "step": 1700 }, { "epoch": 2.27, "eval_loss": 0.9513885378837585, "eval_runtime": 768.2363, "eval_samples_per_second": 2.512, "eval_steps_per_second": 0.315, "step": 1700 }, { "epoch": 2.4, "learning_rate": 0.0002, "loss": 0.766, "step": 1800 }, { "epoch": 2.4, "eval_loss": 0.9479925036430359, "eval_runtime": 792.9294, "eval_samples_per_second": 2.434, "eval_steps_per_second": 0.305, "step": 1800 }, { "epoch": 2.53, "learning_rate": 0.0002, "loss": 0.7587, "step": 1900 }, { "epoch": 2.53, "eval_loss": 0.9420929551124573, "eval_runtime": 787.6767, "eval_samples_per_second": 2.45, "eval_steps_per_second": 0.307, "step": 1900 }, { "epoch": 2.67, "learning_rate": 0.0002, "loss": 0.7546, "step": 2000 }, { "epoch": 2.67, "eval_loss": 0.9371617436408997, "eval_runtime": 785.7561, "eval_samples_per_second": 2.456, "eval_steps_per_second": 0.308, "step": 2000 }, { "epoch": 2.8, "learning_rate": 0.0002, "loss": 0.7635, "step": 2100 }, { "epoch": 2.8, "eval_loss": 0.9326320290565491, "eval_runtime": 789.0027, "eval_samples_per_second": 2.446, "eval_steps_per_second": 0.307, "step": 2100 }, { "epoch": 2.93, "learning_rate": 0.0002, "loss": 0.7632, "step": 2200 }, { "epoch": 2.93, "eval_loss": 0.9287375211715698, "eval_runtime": 785.4978, "eval_samples_per_second": 2.457, "eval_steps_per_second": 0.308, "step": 2200 }, { "epoch": 3.07, "learning_rate": 0.0002, "loss": 0.7409, "step": 2300 }, { "epoch": 3.07, "eval_loss": 0.9274052381515503, "eval_runtime": 779.2439, "eval_samples_per_second": 2.477, "eval_steps_per_second": 0.311, "step": 2300 }, { "epoch": 3.2, "learning_rate": 0.0002, "loss": 0.7247, "step": 2400 }, { "epoch": 3.2, "eval_loss": 0.9260075688362122, "eval_runtime": 775.8037, "eval_samples_per_second": 2.488, "eval_steps_per_second": 0.312, "step": 2400 }, { "epoch": 3.33, "learning_rate": 0.0002, "loss": 0.7277, "step": 2500 }, { "epoch": 3.33, "eval_loss": 0.921540379524231, "eval_runtime": 774.2681, "eval_samples_per_second": 2.493, "eval_steps_per_second": 0.313, "step": 2500 }, { "epoch": 3.47, "learning_rate": 0.0002, "loss": 0.7242, "step": 2600 }, { "epoch": 3.47, "eval_loss": 0.9189460277557373, "eval_runtime": 777.6576, "eval_samples_per_second": 2.482, "eval_steps_per_second": 0.311, "step": 2600 }, { "epoch": 3.6, "learning_rate": 0.0002, "loss": 0.7246, "step": 2700 }, { "epoch": 3.6, "eval_loss": 0.9128983020782471, "eval_runtime": 776.5971, "eval_samples_per_second": 2.485, "eval_steps_per_second": 0.312, "step": 2700 }, { "epoch": 3.73, "learning_rate": 0.0002, "loss": 0.7224, "step": 2800 }, { "epoch": 3.73, "eval_loss": 0.9102813601493835, "eval_runtime": 777.5262, "eval_samples_per_second": 2.482, "eval_steps_per_second": 0.311, "step": 2800 } ], "logging_steps": 100, "max_steps": 3750, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 100, "total_flos": 7.253666659583693e+17, "trial_name": null, "trial_params": null }