{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.922374429223744, "eval_steps": 500, "global_step": 16000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09, "learning_rate": 1.9391171993911722e-05, "loss": 2.9417, "step": 500 }, { "epoch": 0.18, "learning_rate": 1.8782343987823442e-05, "loss": 1.7699, "step": 1000 }, { "epoch": 0.27, "learning_rate": 1.8173515981735163e-05, "loss": 1.5303, "step": 1500 }, { "epoch": 0.37, "learning_rate": 1.756468797564688e-05, "loss": 1.46, "step": 2000 }, { "epoch": 0.46, "learning_rate": 1.69558599695586e-05, "loss": 1.393, "step": 2500 }, { "epoch": 0.55, "learning_rate": 1.634703196347032e-05, "loss": 1.3692, "step": 3000 }, { "epoch": 0.64, "learning_rate": 1.573820395738204e-05, "loss": 1.3134, "step": 3500 }, { "epoch": 0.73, "learning_rate": 1.5129375951293761e-05, "loss": 1.2416, "step": 4000 }, { "epoch": 0.82, "learning_rate": 1.4520547945205482e-05, "loss": 1.2574, "step": 4500 }, { "epoch": 0.91, "learning_rate": 1.39117199391172e-05, "loss": 1.2039, "step": 5000 }, { "epoch": 1.0, "eval_loss": 1.1474684476852417, "eval_runtime": 59.0937, "eval_samples_per_second": 178.868, "eval_steps_per_second": 11.186, "step": 5475 }, { "epoch": 1.0, "learning_rate": 1.330289193302892e-05, "loss": 1.1717, "step": 5500 }, { "epoch": 1.1, "learning_rate": 1.2694063926940641e-05, "loss": 0.9552, "step": 6000 }, { "epoch": 1.19, "learning_rate": 1.2085235920852361e-05, "loss": 0.9877, "step": 6500 }, { "epoch": 1.28, "learning_rate": 1.147640791476408e-05, "loss": 0.9877, "step": 7000 }, { "epoch": 1.37, "learning_rate": 1.08675799086758e-05, "loss": 0.9727, "step": 7500 }, { "epoch": 1.46, "learning_rate": 1.025875190258752e-05, "loss": 0.9713, "step": 8000 }, { "epoch": 1.55, "learning_rate": 9.64992389649924e-06, "loss": 0.9441, "step": 8500 }, { "epoch": 1.64, "learning_rate": 9.04109589041096e-06, "loss": 0.9527, "step": 9000 }, { "epoch": 1.74, "learning_rate": 8.432267884322679e-06, "loss": 0.9311, "step": 9500 }, { "epoch": 1.83, "learning_rate": 7.823439878234399e-06, "loss": 0.9318, "step": 10000 }, { "epoch": 1.92, "learning_rate": 7.214611872146119e-06, "loss": 0.9639, "step": 10500 }, { "epoch": 2.0, "eval_loss": 1.0952799320220947, "eval_runtime": 59.2147, "eval_samples_per_second": 178.503, "eval_steps_per_second": 11.163, "step": 10950 }, { "epoch": 2.01, "learning_rate": 6.605783866057839e-06, "loss": 0.9442, "step": 11000 }, { "epoch": 2.1, "learning_rate": 5.996955859969558e-06, "loss": 0.7817, "step": 11500 }, { "epoch": 2.19, "learning_rate": 5.388127853881279e-06, "loss": 0.7787, "step": 12000 }, { "epoch": 2.28, "learning_rate": 4.779299847792998e-06, "loss": 0.7367, "step": 12500 }, { "epoch": 2.37, "learning_rate": 4.170471841704719e-06, "loss": 0.7513, "step": 13000 }, { "epoch": 2.47, "learning_rate": 3.5616438356164386e-06, "loss": 0.7739, "step": 13500 }, { "epoch": 2.56, "learning_rate": 2.9528158295281586e-06, "loss": 0.7591, "step": 14000 }, { "epoch": 2.65, "learning_rate": 2.343987823439878e-06, "loss": 0.7491, "step": 14500 }, { "epoch": 2.74, "learning_rate": 1.7351598173515982e-06, "loss": 0.7394, "step": 15000 }, { "epoch": 2.83, "learning_rate": 1.1263318112633182e-06, "loss": 0.7548, "step": 15500 }, { "epoch": 2.92, "learning_rate": 5.17503805175038e-07, "loss": 0.7728, "step": 16000 } ], "logging_steps": 500, "max_steps": 16425, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 2.508519922649395e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }