|
{ |
|
"best_metric": 1.905003309249878, |
|
"best_model_checkpoint": "detr-r50-finetuned-mist1-gb-8ah-6l/checkpoint-5290", |
|
"epoch": 50.0, |
|
"eval_steps": 500, |
|
"global_step": 5750, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 9.812173913043479e-06, |
|
"loss": 2.5222, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 2.2562553882598877, |
|
"eval_runtime": 5.7546, |
|
"eval_samples_per_second": 6.951, |
|
"eval_steps_per_second": 0.869, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 9.612173913043479e-06, |
|
"loss": 2.3827, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 2.2210755348205566, |
|
"eval_runtime": 5.7395, |
|
"eval_samples_per_second": 6.969, |
|
"eval_steps_per_second": 0.871, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 9.412173913043479e-06, |
|
"loss": 2.3441, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 2.2602248191833496, |
|
"eval_runtime": 5.7242, |
|
"eval_samples_per_second": 6.988, |
|
"eval_steps_per_second": 0.873, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 9.21217391304348e-06, |
|
"loss": 2.2896, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 2.2359230518341064, |
|
"eval_runtime": 5.684, |
|
"eval_samples_per_second": 7.037, |
|
"eval_steps_per_second": 0.88, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 9.013913043478261e-06, |
|
"loss": 2.2828, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 2.2430644035339355, |
|
"eval_runtime": 5.7198, |
|
"eval_samples_per_second": 6.993, |
|
"eval_steps_per_second": 0.874, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 8.813913043478261e-06, |
|
"loss": 2.2972, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 2.1629228591918945, |
|
"eval_runtime": 5.6906, |
|
"eval_samples_per_second": 7.029, |
|
"eval_steps_per_second": 0.879, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 8.615652173913043e-06, |
|
"loss": 2.3007, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 2.1544721126556396, |
|
"eval_runtime": 5.7423, |
|
"eval_samples_per_second": 6.966, |
|
"eval_steps_per_second": 0.871, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 8.417391304347827e-06, |
|
"loss": 2.2951, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 2.115345001220703, |
|
"eval_runtime": 5.7472, |
|
"eval_samples_per_second": 6.96, |
|
"eval_steps_per_second": 0.87, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 8.217391304347827e-06, |
|
"loss": 2.2595, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 2.1553213596343994, |
|
"eval_runtime": 5.6778, |
|
"eval_samples_per_second": 7.045, |
|
"eval_steps_per_second": 0.881, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 8.017391304347828e-06, |
|
"loss": 2.2327, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 2.205960750579834, |
|
"eval_runtime": 5.7224, |
|
"eval_samples_per_second": 6.99, |
|
"eval_steps_per_second": 0.874, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 7.817391304347826e-06, |
|
"loss": 2.2023, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 2.045210599899292, |
|
"eval_runtime": 5.6947, |
|
"eval_samples_per_second": 7.024, |
|
"eval_steps_per_second": 0.878, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 7.617391304347826e-06, |
|
"loss": 2.2117, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 2.087853193283081, |
|
"eval_runtime": 5.7626, |
|
"eval_samples_per_second": 6.941, |
|
"eval_steps_per_second": 0.868, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 7.417391304347827e-06, |
|
"loss": 2.1805, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 2.1812005043029785, |
|
"eval_runtime": 5.7549, |
|
"eval_samples_per_second": 6.951, |
|
"eval_steps_per_second": 0.869, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 7.217391304347827e-06, |
|
"loss": 2.1344, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 2.0991523265838623, |
|
"eval_runtime": 5.7805, |
|
"eval_samples_per_second": 6.92, |
|
"eval_steps_per_second": 0.865, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 7.017391304347827e-06, |
|
"loss": 2.1057, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 1.983435869216919, |
|
"eval_runtime": 5.7113, |
|
"eval_samples_per_second": 7.004, |
|
"eval_steps_per_second": 0.875, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 6.817391304347826e-06, |
|
"loss": 2.086, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 1.9609792232513428, |
|
"eval_runtime": 5.7575, |
|
"eval_samples_per_second": 6.947, |
|
"eval_steps_per_second": 0.868, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 6.617391304347827e-06, |
|
"loss": 2.0591, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 2.100736141204834, |
|
"eval_runtime": 5.7633, |
|
"eval_samples_per_second": 6.94, |
|
"eval_steps_per_second": 0.868, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 6.417391304347827e-06, |
|
"loss": 2.053, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 2.056126832962036, |
|
"eval_runtime": 5.7709, |
|
"eval_samples_per_second": 6.931, |
|
"eval_steps_per_second": 0.866, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 6.217391304347826e-06, |
|
"loss": 2.0387, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 2.0596375465393066, |
|
"eval_runtime": 5.7884, |
|
"eval_samples_per_second": 6.91, |
|
"eval_steps_per_second": 0.864, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 6.0173913043478264e-06, |
|
"loss": 2.0161, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 1.9885139465332031, |
|
"eval_runtime": 5.7465, |
|
"eval_samples_per_second": 6.961, |
|
"eval_steps_per_second": 0.87, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"learning_rate": 5.817391304347827e-06, |
|
"loss": 2.0374, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_loss": 2.0041000843048096, |
|
"eval_runtime": 5.7421, |
|
"eval_samples_per_second": 6.966, |
|
"eval_steps_per_second": 0.871, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 5.617391304347827e-06, |
|
"loss": 2.0233, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 2.0102856159210205, |
|
"eval_runtime": 5.7047, |
|
"eval_samples_per_second": 7.012, |
|
"eval_steps_per_second": 0.876, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"learning_rate": 5.417391304347826e-06, |
|
"loss": 2.0363, |
|
"step": 2645 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_loss": 2.0540664196014404, |
|
"eval_runtime": 5.7156, |
|
"eval_samples_per_second": 6.998, |
|
"eval_steps_per_second": 0.875, |
|
"step": 2645 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 5.2173913043478265e-06, |
|
"loss": 1.9837, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 1.9924190044403076, |
|
"eval_runtime": 5.6809, |
|
"eval_samples_per_second": 7.041, |
|
"eval_steps_per_second": 0.88, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 5.017391304347826e-06, |
|
"loss": 1.9943, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 2.0557620525360107, |
|
"eval_runtime": 5.7087, |
|
"eval_samples_per_second": 7.007, |
|
"eval_steps_per_second": 0.876, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 4.817391304347827e-06, |
|
"loss": 1.9846, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_loss": 1.9873688220977783, |
|
"eval_runtime": 5.6682, |
|
"eval_samples_per_second": 7.057, |
|
"eval_steps_per_second": 0.882, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"learning_rate": 4.617391304347826e-06, |
|
"loss": 1.9601, |
|
"step": 3105 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_loss": 1.9554007053375244, |
|
"eval_runtime": 5.7979, |
|
"eval_samples_per_second": 6.899, |
|
"eval_steps_per_second": 0.862, |
|
"step": 3105 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 4.4173913043478265e-06, |
|
"loss": 1.9837, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_loss": 1.9988619089126587, |
|
"eval_runtime": 5.7796, |
|
"eval_samples_per_second": 6.921, |
|
"eval_steps_per_second": 0.865, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"learning_rate": 4.217391304347827e-06, |
|
"loss": 1.9664, |
|
"step": 3335 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_loss": 1.9875919818878174, |
|
"eval_runtime": 5.7433, |
|
"eval_samples_per_second": 6.965, |
|
"eval_steps_per_second": 0.871, |
|
"step": 3335 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 4.017391304347826e-06, |
|
"loss": 1.966, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 1.9754610061645508, |
|
"eval_runtime": 5.8653, |
|
"eval_samples_per_second": 6.82, |
|
"eval_steps_per_second": 0.852, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"learning_rate": 3.819130434782609e-06, |
|
"loss": 1.9226, |
|
"step": 3565 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_loss": 1.9357328414916992, |
|
"eval_runtime": 5.765, |
|
"eval_samples_per_second": 6.938, |
|
"eval_steps_per_second": 0.867, |
|
"step": 3565 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 3.6191304347826088e-06, |
|
"loss": 1.9405, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_loss": 1.9239734411239624, |
|
"eval_runtime": 5.8194, |
|
"eval_samples_per_second": 6.874, |
|
"eval_steps_per_second": 0.859, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"learning_rate": 3.4191304347826086e-06, |
|
"loss": 1.9035, |
|
"step": 3795 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_loss": 1.9410585165023804, |
|
"eval_runtime": 5.8097, |
|
"eval_samples_per_second": 6.885, |
|
"eval_steps_per_second": 0.861, |
|
"step": 3795 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"learning_rate": 3.219130434782609e-06, |
|
"loss": 1.8924, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_loss": 1.9291362762451172, |
|
"eval_runtime": 5.8014, |
|
"eval_samples_per_second": 6.895, |
|
"eval_steps_per_second": 0.862, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 3.019130434782609e-06, |
|
"loss": 1.8801, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_loss": 1.9660656452178955, |
|
"eval_runtime": 5.7747, |
|
"eval_samples_per_second": 6.927, |
|
"eval_steps_per_second": 0.866, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"learning_rate": 2.819130434782609e-06, |
|
"loss": 1.8698, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_loss": 1.9104881286621094, |
|
"eval_runtime": 5.7592, |
|
"eval_samples_per_second": 6.945, |
|
"eval_steps_per_second": 0.868, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"learning_rate": 2.619130434782609e-06, |
|
"loss": 1.8572, |
|
"step": 4255 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_loss": 1.944820761680603, |
|
"eval_runtime": 5.7796, |
|
"eval_samples_per_second": 6.921, |
|
"eval_steps_per_second": 0.865, |
|
"step": 4255 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"learning_rate": 2.419130434782609e-06, |
|
"loss": 1.8756, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_loss": 1.9674819707870483, |
|
"eval_runtime": 5.7301, |
|
"eval_samples_per_second": 6.981, |
|
"eval_steps_per_second": 0.873, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"learning_rate": 2.219130434782609e-06, |
|
"loss": 1.8593, |
|
"step": 4485 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_loss": 1.9364864826202393, |
|
"eval_runtime": 5.8116, |
|
"eval_samples_per_second": 6.883, |
|
"eval_steps_per_second": 0.86, |
|
"step": 4485 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 2.019130434782609e-06, |
|
"loss": 1.8713, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_loss": 1.9382976293563843, |
|
"eval_runtime": 5.7132, |
|
"eval_samples_per_second": 7.001, |
|
"eval_steps_per_second": 0.875, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"learning_rate": 1.8191304347826088e-06, |
|
"loss": 1.8436, |
|
"step": 4715 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_loss": 1.967057466506958, |
|
"eval_runtime": 5.7284, |
|
"eval_samples_per_second": 6.983, |
|
"eval_steps_per_second": 0.873, |
|
"step": 4715 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"learning_rate": 1.6191304347826088e-06, |
|
"loss": 1.83, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_loss": 1.9526548385620117, |
|
"eval_runtime": 5.6918, |
|
"eval_samples_per_second": 7.028, |
|
"eval_steps_per_second": 0.878, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"learning_rate": 1.4191304347826089e-06, |
|
"loss": 1.857, |
|
"step": 4945 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_loss": 1.944758653640747, |
|
"eval_runtime": 5.7519, |
|
"eval_samples_per_second": 6.954, |
|
"eval_steps_per_second": 0.869, |
|
"step": 4945 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"learning_rate": 1.2191304347826089e-06, |
|
"loss": 1.8318, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_loss": 1.9366220235824585, |
|
"eval_runtime": 5.7436, |
|
"eval_samples_per_second": 6.964, |
|
"eval_steps_per_second": 0.871, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"learning_rate": 1.0191304347826089e-06, |
|
"loss": 1.8177, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_loss": 1.9388927221298218, |
|
"eval_runtime": 5.8021, |
|
"eval_samples_per_second": 6.894, |
|
"eval_steps_per_second": 0.862, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"learning_rate": 8.191304347826088e-07, |
|
"loss": 1.8034, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_loss": 1.905003309249878, |
|
"eval_runtime": 5.7813, |
|
"eval_samples_per_second": 6.919, |
|
"eval_steps_per_second": 0.865, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"learning_rate": 6.191304347826088e-07, |
|
"loss": 1.8226, |
|
"step": 5405 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_loss": 1.9226171970367432, |
|
"eval_runtime": 5.8014, |
|
"eval_samples_per_second": 6.895, |
|
"eval_steps_per_second": 0.862, |
|
"step": 5405 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"learning_rate": 4.1913043478260874e-07, |
|
"loss": 1.818, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_loss": 1.9150111675262451, |
|
"eval_runtime": 5.7701, |
|
"eval_samples_per_second": 6.932, |
|
"eval_steps_per_second": 0.867, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"learning_rate": 2.191304347826087e-07, |
|
"loss": 1.8148, |
|
"step": 5635 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_loss": 1.9168732166290283, |
|
"eval_runtime": 5.7338, |
|
"eval_samples_per_second": 6.976, |
|
"eval_steps_per_second": 0.872, |
|
"step": 5635 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 1.91304347826087e-08, |
|
"loss": 1.7984, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_loss": 1.9223819971084595, |
|
"eval_runtime": 5.7595, |
|
"eval_samples_per_second": 6.945, |
|
"eval_steps_per_second": 0.868, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"step": 5750, |
|
"total_flos": 1.098949102848e+19, |
|
"train_loss": 2.026795845363451, |
|
"train_runtime": 4682.5898, |
|
"train_samples_per_second": 4.912, |
|
"train_steps_per_second": 1.228 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 5750, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"total_flos": 1.098949102848e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|