{ "best_metric": 0.06640350073575974, "best_model_checkpoint": "clock_output_cvt/checkpoint-475", "epoch": 24.675324675324674, "eval_steps": 500, "global_step": 475, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.52, "learning_rate": 1.0416666666666668e-05, "loss": 2.4077, "step": 10 }, { "epoch": 0.99, "eval_accuracy": 0.03625954198473282, "eval_loss": 2.3564352989196777, "eval_runtime": 0.6407, "eval_samples_per_second": 817.896, "eval_steps_per_second": 51.509, "step": 19 }, { "epoch": 1.04, "learning_rate": 2.0833333333333336e-05, "loss": 2.388, "step": 20 }, { "epoch": 1.56, "learning_rate": 3.125e-05, "loss": 2.3477, "step": 30 }, { "epoch": 1.97, "eval_accuracy": 0.4122137404580153, "eval_loss": 2.189662218093872, "eval_runtime": 0.6085, "eval_samples_per_second": 861.15, "eval_steps_per_second": 54.233, "step": 38 }, { "epoch": 2.08, "learning_rate": 4.166666666666667e-05, "loss": 2.2977, "step": 40 }, { "epoch": 2.6, "learning_rate": 4.9765807962529274e-05, "loss": 2.1449, "step": 50 }, { "epoch": 2.96, "eval_accuracy": 0.816793893129771, "eval_loss": 1.8227535486221313, "eval_runtime": 0.613, "eval_samples_per_second": 854.76, "eval_steps_per_second": 53.83, "step": 57 }, { "epoch": 3.12, "learning_rate": 4.8594847775175645e-05, "loss": 2.0451, "step": 60 }, { "epoch": 3.64, "learning_rate": 4.742388758782202e-05, "loss": 1.7728, "step": 70 }, { "epoch": 4.0, "eval_accuracy": 0.8549618320610687, "eval_loss": 1.2455334663391113, "eval_runtime": 0.6145, "eval_samples_per_second": 852.784, "eval_steps_per_second": 53.706, "step": 77 }, { "epoch": 4.16, "learning_rate": 4.625292740046839e-05, "loss": 1.5092, "step": 80 }, { "epoch": 4.68, "learning_rate": 4.508196721311476e-05, "loss": 1.3611, "step": 90 }, { "epoch": 4.99, "eval_accuracy": 0.9408396946564885, "eval_loss": 0.8229649662971497, "eval_runtime": 0.7007, "eval_samples_per_second": 747.777, "eval_steps_per_second": 47.093, "step": 96 }, { "epoch": 5.19, "learning_rate": 4.3911007025761124e-05, "loss": 1.254, "step": 100 }, { "epoch": 5.71, "learning_rate": 4.2740046838407495e-05, "loss": 1.1696, "step": 110 }, { "epoch": 5.97, "eval_accuracy": 0.9580152671755725, "eval_loss": 0.6116180419921875, "eval_runtime": 0.6501, "eval_samples_per_second": 806.052, "eval_steps_per_second": 50.763, "step": 115 }, { "epoch": 6.23, "learning_rate": 4.156908665105387e-05, "loss": 1.0323, "step": 120 }, { "epoch": 6.75, "learning_rate": 4.039812646370024e-05, "loss": 1.0261, "step": 130 }, { "epoch": 6.96, "eval_accuracy": 0.9866412213740458, "eval_loss": 0.4360557198524475, "eval_runtime": 0.6176, "eval_samples_per_second": 848.435, "eval_steps_per_second": 53.432, "step": 134 }, { "epoch": 7.27, "learning_rate": 3.92271662763466e-05, "loss": 0.7872, "step": 140 }, { "epoch": 7.79, "learning_rate": 3.8056206088992974e-05, "loss": 0.8913, "step": 150 }, { "epoch": 8.0, "eval_accuracy": 0.9790076335877863, "eval_loss": 0.3085819184780121, "eval_runtime": 0.6116, "eval_samples_per_second": 856.814, "eval_steps_per_second": 53.96, "step": 154 }, { "epoch": 8.31, "learning_rate": 3.6885245901639346e-05, "loss": 0.7895, "step": 160 }, { "epoch": 8.83, "learning_rate": 3.571428571428572e-05, "loss": 0.6873, "step": 170 }, { "epoch": 8.99, "eval_accuracy": 0.9904580152671756, "eval_loss": 0.21129438281059265, "eval_runtime": 0.6415, "eval_samples_per_second": 816.831, "eval_steps_per_second": 51.442, "step": 173 }, { "epoch": 9.35, "learning_rate": 3.454332552693209e-05, "loss": 0.6743, "step": 180 }, { "epoch": 9.87, "learning_rate": 3.337236533957845e-05, "loss": 0.7179, "step": 190 }, { "epoch": 9.97, "eval_accuracy": 0.9904580152671756, "eval_loss": 0.18043160438537598, "eval_runtime": 0.6319, "eval_samples_per_second": 829.26, "eval_steps_per_second": 52.224, "step": 192 }, { "epoch": 10.39, "learning_rate": 3.220140515222483e-05, "loss": 0.7141, "step": 200 }, { "epoch": 10.91, "learning_rate": 3.1030444964871196e-05, "loss": 0.6876, "step": 210 }, { "epoch": 10.96, "eval_accuracy": 0.9923664122137404, "eval_loss": 0.14928863942623138, "eval_runtime": 0.6689, "eval_samples_per_second": 783.382, "eval_steps_per_second": 49.335, "step": 211 }, { "epoch": 11.43, "learning_rate": 2.9859484777517567e-05, "loss": 0.5847, "step": 220 }, { "epoch": 11.95, "learning_rate": 2.8688524590163935e-05, "loss": 0.5725, "step": 230 }, { "epoch": 12.0, "eval_accuracy": 0.9923664122137404, "eval_loss": 0.12330139428377151, "eval_runtime": 0.6335, "eval_samples_per_second": 827.089, "eval_steps_per_second": 52.088, "step": 231 }, { "epoch": 12.47, "learning_rate": 2.7517564402810303e-05, "loss": 0.6264, "step": 240 }, { "epoch": 12.99, "learning_rate": 2.6346604215456678e-05, "loss": 0.6068, "step": 250 }, { "epoch": 12.99, "eval_accuracy": 0.9923664122137404, "eval_loss": 0.10889697074890137, "eval_runtime": 0.6123, "eval_samples_per_second": 855.795, "eval_steps_per_second": 53.895, "step": 250 }, { "epoch": 13.51, "learning_rate": 2.5175644028103046e-05, "loss": 0.7263, "step": 260 }, { "epoch": 13.97, "eval_accuracy": 0.9923664122137404, "eval_loss": 0.09741852432489395, "eval_runtime": 0.6166, "eval_samples_per_second": 849.786, "eval_steps_per_second": 53.517, "step": 269 }, { "epoch": 14.03, "learning_rate": 2.4004683840749414e-05, "loss": 0.5785, "step": 270 }, { "epoch": 14.55, "learning_rate": 2.2833723653395785e-05, "loss": 0.5596, "step": 280 }, { "epoch": 14.96, "eval_accuracy": 0.9923664122137404, "eval_loss": 0.08803129196166992, "eval_runtime": 0.6127, "eval_samples_per_second": 855.286, "eval_steps_per_second": 53.863, "step": 288 }, { "epoch": 15.06, "learning_rate": 2.1662763466042153e-05, "loss": 0.604, "step": 290 }, { "epoch": 15.58, "learning_rate": 2.0491803278688525e-05, "loss": 0.5638, "step": 300 }, { "epoch": 16.0, "eval_accuracy": 0.9923664122137404, "eval_loss": 0.08413369953632355, "eval_runtime": 0.6464, "eval_samples_per_second": 810.624, "eval_steps_per_second": 51.051, "step": 308 }, { "epoch": 16.1, "learning_rate": 1.9320843091334896e-05, "loss": 0.5579, "step": 310 }, { "epoch": 16.62, "learning_rate": 1.8149882903981267e-05, "loss": 0.5601, "step": 320 }, { "epoch": 16.99, "eval_accuracy": 0.9923664122137404, "eval_loss": 0.07752922177314758, "eval_runtime": 0.6764, "eval_samples_per_second": 774.698, "eval_steps_per_second": 48.788, "step": 327 }, { "epoch": 17.14, "learning_rate": 1.6978922716627635e-05, "loss": 0.3986, "step": 330 }, { "epoch": 17.66, "learning_rate": 1.5807962529274007e-05, "loss": 0.606, "step": 340 }, { "epoch": 17.97, "eval_accuracy": 0.9923664122137404, "eval_loss": 0.07416040450334549, "eval_runtime": 0.6307, "eval_samples_per_second": 830.823, "eval_steps_per_second": 52.323, "step": 346 }, { "epoch": 18.18, "learning_rate": 1.4637002341920375e-05, "loss": 0.6279, "step": 350 }, { "epoch": 18.7, "learning_rate": 1.3466042154566746e-05, "loss": 0.5748, "step": 360 }, { "epoch": 18.96, "eval_accuracy": 0.9923664122137404, "eval_loss": 0.07282942533493042, "eval_runtime": 0.8001, "eval_samples_per_second": 654.911, "eval_steps_per_second": 41.244, "step": 365 }, { "epoch": 19.22, "learning_rate": 1.2295081967213116e-05, "loss": 0.5834, "step": 370 }, { "epoch": 19.74, "learning_rate": 1.1124121779859485e-05, "loss": 0.4852, "step": 380 }, { "epoch": 20.0, "eval_accuracy": 0.9923664122137404, "eval_loss": 0.07014743238687515, "eval_runtime": 0.7282, "eval_samples_per_second": 719.555, "eval_steps_per_second": 45.315, "step": 385 }, { "epoch": 20.26, "learning_rate": 9.953161592505855e-06, "loss": 0.5781, "step": 390 }, { "epoch": 20.78, "learning_rate": 8.782201405152225e-06, "loss": 0.4564, "step": 400 }, { "epoch": 20.99, "eval_accuracy": 0.9923664122137404, "eval_loss": 0.06891454756259918, "eval_runtime": 0.7574, "eval_samples_per_second": 691.858, "eval_steps_per_second": 43.571, "step": 404 }, { "epoch": 21.3, "learning_rate": 7.611241217798595e-06, "loss": 0.5382, "step": 410 }, { "epoch": 21.82, "learning_rate": 6.440281030444965e-06, "loss": 0.4278, "step": 420 }, { "epoch": 21.97, "eval_accuracy": 0.9923664122137404, "eval_loss": 0.06956692785024643, "eval_runtime": 0.7478, "eval_samples_per_second": 700.728, "eval_steps_per_second": 44.13, "step": 423 }, { "epoch": 22.34, "learning_rate": 5.2693208430913356e-06, "loss": 0.4226, "step": 430 }, { "epoch": 22.86, "learning_rate": 4.098360655737704e-06, "loss": 0.481, "step": 440 }, { "epoch": 22.96, "eval_accuracy": 0.9923664122137404, "eval_loss": 0.06689619272947311, "eval_runtime": 0.7742, "eval_samples_per_second": 676.868, "eval_steps_per_second": 42.627, "step": 442 }, { "epoch": 23.38, "learning_rate": 2.9274004683840754e-06, "loss": 0.5381, "step": 450 }, { "epoch": 23.9, "learning_rate": 1.7564402810304448e-06, "loss": 0.4477, "step": 460 }, { "epoch": 24.0, "eval_accuracy": 0.9923664122137404, "eval_loss": 0.06719958037137985, "eval_runtime": 0.6158, "eval_samples_per_second": 850.91, "eval_steps_per_second": 53.588, "step": 462 }, { "epoch": 24.42, "learning_rate": 5.85480093676815e-07, "loss": 0.598, "step": 470 }, { "epoch": 24.68, "eval_accuracy": 0.9923664122137404, "eval_loss": 0.06640350073575974, "eval_runtime": 0.6103, "eval_samples_per_second": 858.552, "eval_steps_per_second": 54.069, "step": 475 } ], "logging_steps": 10, "max_steps": 475, "num_input_tokens_seen": 0, "num_train_epochs": 25, "save_steps": 500, "total_flos": 2737124951961600.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }