|
{ |
|
"best_metric": 0.06640350073575974, |
|
"best_model_checkpoint": "clock_output_cvt/checkpoint-475", |
|
"epoch": 24.675324675324674, |
|
"eval_steps": 500, |
|
"global_step": 475, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.0416666666666668e-05, |
|
"loss": 2.4077, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.03625954198473282, |
|
"eval_loss": 2.3564352989196777, |
|
"eval_runtime": 0.6407, |
|
"eval_samples_per_second": 817.896, |
|
"eval_steps_per_second": 51.509, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 2.388, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 3.125e-05, |
|
"loss": 2.3477, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_accuracy": 0.4122137404580153, |
|
"eval_loss": 2.189662218093872, |
|
"eval_runtime": 0.6085, |
|
"eval_samples_per_second": 861.15, |
|
"eval_steps_per_second": 54.233, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 2.2977, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 4.9765807962529274e-05, |
|
"loss": 2.1449, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_accuracy": 0.816793893129771, |
|
"eval_loss": 1.8227535486221313, |
|
"eval_runtime": 0.613, |
|
"eval_samples_per_second": 854.76, |
|
"eval_steps_per_second": 53.83, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 4.8594847775175645e-05, |
|
"loss": 2.0451, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 4.742388758782202e-05, |
|
"loss": 1.7728, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8549618320610687, |
|
"eval_loss": 1.2455334663391113, |
|
"eval_runtime": 0.6145, |
|
"eval_samples_per_second": 852.784, |
|
"eval_steps_per_second": 53.706, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 4.625292740046839e-05, |
|
"loss": 1.5092, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 4.508196721311476e-05, |
|
"loss": 1.3611, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_accuracy": 0.9408396946564885, |
|
"eval_loss": 0.8229649662971497, |
|
"eval_runtime": 0.7007, |
|
"eval_samples_per_second": 747.777, |
|
"eval_steps_per_second": 47.093, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 4.3911007025761124e-05, |
|
"loss": 1.254, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 4.2740046838407495e-05, |
|
"loss": 1.1696, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"eval_accuracy": 0.9580152671755725, |
|
"eval_loss": 0.6116180419921875, |
|
"eval_runtime": 0.6501, |
|
"eval_samples_per_second": 806.052, |
|
"eval_steps_per_second": 50.763, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"learning_rate": 4.156908665105387e-05, |
|
"loss": 1.0323, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"learning_rate": 4.039812646370024e-05, |
|
"loss": 1.0261, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"eval_accuracy": 0.9866412213740458, |
|
"eval_loss": 0.4360557198524475, |
|
"eval_runtime": 0.6176, |
|
"eval_samples_per_second": 848.435, |
|
"eval_steps_per_second": 53.432, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 3.92271662763466e-05, |
|
"loss": 0.7872, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 3.8056206088992974e-05, |
|
"loss": 0.8913, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9790076335877863, |
|
"eval_loss": 0.3085819184780121, |
|
"eval_runtime": 0.6116, |
|
"eval_samples_per_second": 856.814, |
|
"eval_steps_per_second": 53.96, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"learning_rate": 3.6885245901639346e-05, |
|
"loss": 0.7895, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 8.83, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 0.6873, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_accuracy": 0.9904580152671756, |
|
"eval_loss": 0.21129438281059265, |
|
"eval_runtime": 0.6415, |
|
"eval_samples_per_second": 816.831, |
|
"eval_steps_per_second": 51.442, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 9.35, |
|
"learning_rate": 3.454332552693209e-05, |
|
"loss": 0.6743, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 9.87, |
|
"learning_rate": 3.337236533957845e-05, |
|
"loss": 0.7179, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"eval_accuracy": 0.9904580152671756, |
|
"eval_loss": 0.18043160438537598, |
|
"eval_runtime": 0.6319, |
|
"eval_samples_per_second": 829.26, |
|
"eval_steps_per_second": 52.224, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 10.39, |
|
"learning_rate": 3.220140515222483e-05, |
|
"loss": 0.7141, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 10.91, |
|
"learning_rate": 3.1030444964871196e-05, |
|
"loss": 0.6876, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 10.96, |
|
"eval_accuracy": 0.9923664122137404, |
|
"eval_loss": 0.14928863942623138, |
|
"eval_runtime": 0.6689, |
|
"eval_samples_per_second": 783.382, |
|
"eval_steps_per_second": 49.335, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 11.43, |
|
"learning_rate": 2.9859484777517567e-05, |
|
"loss": 0.5847, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 11.95, |
|
"learning_rate": 2.8688524590163935e-05, |
|
"loss": 0.5725, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9923664122137404, |
|
"eval_loss": 0.12330139428377151, |
|
"eval_runtime": 0.6335, |
|
"eval_samples_per_second": 827.089, |
|
"eval_steps_per_second": 52.088, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 12.47, |
|
"learning_rate": 2.7517564402810303e-05, |
|
"loss": 0.6264, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"learning_rate": 2.6346604215456678e-05, |
|
"loss": 0.6068, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"eval_accuracy": 0.9923664122137404, |
|
"eval_loss": 0.10889697074890137, |
|
"eval_runtime": 0.6123, |
|
"eval_samples_per_second": 855.795, |
|
"eval_steps_per_second": 53.895, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 13.51, |
|
"learning_rate": 2.5175644028103046e-05, |
|
"loss": 0.7263, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 13.97, |
|
"eval_accuracy": 0.9923664122137404, |
|
"eval_loss": 0.09741852432489395, |
|
"eval_runtime": 0.6166, |
|
"eval_samples_per_second": 849.786, |
|
"eval_steps_per_second": 53.517, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 14.03, |
|
"learning_rate": 2.4004683840749414e-05, |
|
"loss": 0.5785, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 14.55, |
|
"learning_rate": 2.2833723653395785e-05, |
|
"loss": 0.5596, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 14.96, |
|
"eval_accuracy": 0.9923664122137404, |
|
"eval_loss": 0.08803129196166992, |
|
"eval_runtime": 0.6127, |
|
"eval_samples_per_second": 855.286, |
|
"eval_steps_per_second": 53.863, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 15.06, |
|
"learning_rate": 2.1662763466042153e-05, |
|
"loss": 0.604, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 15.58, |
|
"learning_rate": 2.0491803278688525e-05, |
|
"loss": 0.5638, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9923664122137404, |
|
"eval_loss": 0.08413369953632355, |
|
"eval_runtime": 0.6464, |
|
"eval_samples_per_second": 810.624, |
|
"eval_steps_per_second": 51.051, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 16.1, |
|
"learning_rate": 1.9320843091334896e-05, |
|
"loss": 0.5579, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 16.62, |
|
"learning_rate": 1.8149882903981267e-05, |
|
"loss": 0.5601, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"eval_accuracy": 0.9923664122137404, |
|
"eval_loss": 0.07752922177314758, |
|
"eval_runtime": 0.6764, |
|
"eval_samples_per_second": 774.698, |
|
"eval_steps_per_second": 48.788, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 17.14, |
|
"learning_rate": 1.6978922716627635e-05, |
|
"loss": 0.3986, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 17.66, |
|
"learning_rate": 1.5807962529274007e-05, |
|
"loss": 0.606, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 17.97, |
|
"eval_accuracy": 0.9923664122137404, |
|
"eval_loss": 0.07416040450334549, |
|
"eval_runtime": 0.6307, |
|
"eval_samples_per_second": 830.823, |
|
"eval_steps_per_second": 52.323, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 18.18, |
|
"learning_rate": 1.4637002341920375e-05, |
|
"loss": 0.6279, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 18.7, |
|
"learning_rate": 1.3466042154566746e-05, |
|
"loss": 0.5748, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 18.96, |
|
"eval_accuracy": 0.9923664122137404, |
|
"eval_loss": 0.07282942533493042, |
|
"eval_runtime": 0.8001, |
|
"eval_samples_per_second": 654.911, |
|
"eval_steps_per_second": 41.244, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 19.22, |
|
"learning_rate": 1.2295081967213116e-05, |
|
"loss": 0.5834, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 19.74, |
|
"learning_rate": 1.1124121779859485e-05, |
|
"loss": 0.4852, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9923664122137404, |
|
"eval_loss": 0.07014743238687515, |
|
"eval_runtime": 0.7282, |
|
"eval_samples_per_second": 719.555, |
|
"eval_steps_per_second": 45.315, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 20.26, |
|
"learning_rate": 9.953161592505855e-06, |
|
"loss": 0.5781, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 20.78, |
|
"learning_rate": 8.782201405152225e-06, |
|
"loss": 0.4564, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 20.99, |
|
"eval_accuracy": 0.9923664122137404, |
|
"eval_loss": 0.06891454756259918, |
|
"eval_runtime": 0.7574, |
|
"eval_samples_per_second": 691.858, |
|
"eval_steps_per_second": 43.571, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 21.3, |
|
"learning_rate": 7.611241217798595e-06, |
|
"loss": 0.5382, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 21.82, |
|
"learning_rate": 6.440281030444965e-06, |
|
"loss": 0.4278, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 21.97, |
|
"eval_accuracy": 0.9923664122137404, |
|
"eval_loss": 0.06956692785024643, |
|
"eval_runtime": 0.7478, |
|
"eval_samples_per_second": 700.728, |
|
"eval_steps_per_second": 44.13, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 22.34, |
|
"learning_rate": 5.2693208430913356e-06, |
|
"loss": 0.4226, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 22.86, |
|
"learning_rate": 4.098360655737704e-06, |
|
"loss": 0.481, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 22.96, |
|
"eval_accuracy": 0.9923664122137404, |
|
"eval_loss": 0.06689619272947311, |
|
"eval_runtime": 0.7742, |
|
"eval_samples_per_second": 676.868, |
|
"eval_steps_per_second": 42.627, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 23.38, |
|
"learning_rate": 2.9274004683840754e-06, |
|
"loss": 0.5381, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 23.9, |
|
"learning_rate": 1.7564402810304448e-06, |
|
"loss": 0.4477, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.9923664122137404, |
|
"eval_loss": 0.06719958037137985, |
|
"eval_runtime": 0.6158, |
|
"eval_samples_per_second": 850.91, |
|
"eval_steps_per_second": 53.588, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 24.42, |
|
"learning_rate": 5.85480093676815e-07, |
|
"loss": 0.598, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 24.68, |
|
"eval_accuracy": 0.9923664122137404, |
|
"eval_loss": 0.06640350073575974, |
|
"eval_runtime": 0.6103, |
|
"eval_samples_per_second": 858.552, |
|
"eval_steps_per_second": 54.069, |
|
"step": 475 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 475, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 25, |
|
"save_steps": 500, |
|
"total_flos": 2737124951961600.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|