polejowska's picture
End of training
9da173a
{
"best_metric": 1.905003309249878,
"best_model_checkpoint": "detr-r50-finetuned-mist1-gb-8ah-6l/checkpoint-5290",
"epoch": 50.0,
"eval_steps": 500,
"global_step": 5750,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 9.812173913043479e-06,
"loss": 2.5222,
"step": 115
},
{
"epoch": 1.0,
"eval_loss": 2.2562553882598877,
"eval_runtime": 5.7546,
"eval_samples_per_second": 6.951,
"eval_steps_per_second": 0.869,
"step": 115
},
{
"epoch": 2.0,
"learning_rate": 9.612173913043479e-06,
"loss": 2.3827,
"step": 230
},
{
"epoch": 2.0,
"eval_loss": 2.2210755348205566,
"eval_runtime": 5.7395,
"eval_samples_per_second": 6.969,
"eval_steps_per_second": 0.871,
"step": 230
},
{
"epoch": 3.0,
"learning_rate": 9.412173913043479e-06,
"loss": 2.3441,
"step": 345
},
{
"epoch": 3.0,
"eval_loss": 2.2602248191833496,
"eval_runtime": 5.7242,
"eval_samples_per_second": 6.988,
"eval_steps_per_second": 0.873,
"step": 345
},
{
"epoch": 4.0,
"learning_rate": 9.21217391304348e-06,
"loss": 2.2896,
"step": 460
},
{
"epoch": 4.0,
"eval_loss": 2.2359230518341064,
"eval_runtime": 5.684,
"eval_samples_per_second": 7.037,
"eval_steps_per_second": 0.88,
"step": 460
},
{
"epoch": 5.0,
"learning_rate": 9.013913043478261e-06,
"loss": 2.2828,
"step": 575
},
{
"epoch": 5.0,
"eval_loss": 2.2430644035339355,
"eval_runtime": 5.7198,
"eval_samples_per_second": 6.993,
"eval_steps_per_second": 0.874,
"step": 575
},
{
"epoch": 6.0,
"learning_rate": 8.813913043478261e-06,
"loss": 2.2972,
"step": 690
},
{
"epoch": 6.0,
"eval_loss": 2.1629228591918945,
"eval_runtime": 5.6906,
"eval_samples_per_second": 7.029,
"eval_steps_per_second": 0.879,
"step": 690
},
{
"epoch": 7.0,
"learning_rate": 8.615652173913043e-06,
"loss": 2.3007,
"step": 805
},
{
"epoch": 7.0,
"eval_loss": 2.1544721126556396,
"eval_runtime": 5.7423,
"eval_samples_per_second": 6.966,
"eval_steps_per_second": 0.871,
"step": 805
},
{
"epoch": 8.0,
"learning_rate": 8.417391304347827e-06,
"loss": 2.2951,
"step": 920
},
{
"epoch": 8.0,
"eval_loss": 2.115345001220703,
"eval_runtime": 5.7472,
"eval_samples_per_second": 6.96,
"eval_steps_per_second": 0.87,
"step": 920
},
{
"epoch": 9.0,
"learning_rate": 8.217391304347827e-06,
"loss": 2.2595,
"step": 1035
},
{
"epoch": 9.0,
"eval_loss": 2.1553213596343994,
"eval_runtime": 5.6778,
"eval_samples_per_second": 7.045,
"eval_steps_per_second": 0.881,
"step": 1035
},
{
"epoch": 10.0,
"learning_rate": 8.017391304347828e-06,
"loss": 2.2327,
"step": 1150
},
{
"epoch": 10.0,
"eval_loss": 2.205960750579834,
"eval_runtime": 5.7224,
"eval_samples_per_second": 6.99,
"eval_steps_per_second": 0.874,
"step": 1150
},
{
"epoch": 11.0,
"learning_rate": 7.817391304347826e-06,
"loss": 2.2023,
"step": 1265
},
{
"epoch": 11.0,
"eval_loss": 2.045210599899292,
"eval_runtime": 5.6947,
"eval_samples_per_second": 7.024,
"eval_steps_per_second": 0.878,
"step": 1265
},
{
"epoch": 12.0,
"learning_rate": 7.617391304347826e-06,
"loss": 2.2117,
"step": 1380
},
{
"epoch": 12.0,
"eval_loss": 2.087853193283081,
"eval_runtime": 5.7626,
"eval_samples_per_second": 6.941,
"eval_steps_per_second": 0.868,
"step": 1380
},
{
"epoch": 13.0,
"learning_rate": 7.417391304347827e-06,
"loss": 2.1805,
"step": 1495
},
{
"epoch": 13.0,
"eval_loss": 2.1812005043029785,
"eval_runtime": 5.7549,
"eval_samples_per_second": 6.951,
"eval_steps_per_second": 0.869,
"step": 1495
},
{
"epoch": 14.0,
"learning_rate": 7.217391304347827e-06,
"loss": 2.1344,
"step": 1610
},
{
"epoch": 14.0,
"eval_loss": 2.0991523265838623,
"eval_runtime": 5.7805,
"eval_samples_per_second": 6.92,
"eval_steps_per_second": 0.865,
"step": 1610
},
{
"epoch": 15.0,
"learning_rate": 7.017391304347827e-06,
"loss": 2.1057,
"step": 1725
},
{
"epoch": 15.0,
"eval_loss": 1.983435869216919,
"eval_runtime": 5.7113,
"eval_samples_per_second": 7.004,
"eval_steps_per_second": 0.875,
"step": 1725
},
{
"epoch": 16.0,
"learning_rate": 6.817391304347826e-06,
"loss": 2.086,
"step": 1840
},
{
"epoch": 16.0,
"eval_loss": 1.9609792232513428,
"eval_runtime": 5.7575,
"eval_samples_per_second": 6.947,
"eval_steps_per_second": 0.868,
"step": 1840
},
{
"epoch": 17.0,
"learning_rate": 6.617391304347827e-06,
"loss": 2.0591,
"step": 1955
},
{
"epoch": 17.0,
"eval_loss": 2.100736141204834,
"eval_runtime": 5.7633,
"eval_samples_per_second": 6.94,
"eval_steps_per_second": 0.868,
"step": 1955
},
{
"epoch": 18.0,
"learning_rate": 6.417391304347827e-06,
"loss": 2.053,
"step": 2070
},
{
"epoch": 18.0,
"eval_loss": 2.056126832962036,
"eval_runtime": 5.7709,
"eval_samples_per_second": 6.931,
"eval_steps_per_second": 0.866,
"step": 2070
},
{
"epoch": 19.0,
"learning_rate": 6.217391304347826e-06,
"loss": 2.0387,
"step": 2185
},
{
"epoch": 19.0,
"eval_loss": 2.0596375465393066,
"eval_runtime": 5.7884,
"eval_samples_per_second": 6.91,
"eval_steps_per_second": 0.864,
"step": 2185
},
{
"epoch": 20.0,
"learning_rate": 6.0173913043478264e-06,
"loss": 2.0161,
"step": 2300
},
{
"epoch": 20.0,
"eval_loss": 1.9885139465332031,
"eval_runtime": 5.7465,
"eval_samples_per_second": 6.961,
"eval_steps_per_second": 0.87,
"step": 2300
},
{
"epoch": 21.0,
"learning_rate": 5.817391304347827e-06,
"loss": 2.0374,
"step": 2415
},
{
"epoch": 21.0,
"eval_loss": 2.0041000843048096,
"eval_runtime": 5.7421,
"eval_samples_per_second": 6.966,
"eval_steps_per_second": 0.871,
"step": 2415
},
{
"epoch": 22.0,
"learning_rate": 5.617391304347827e-06,
"loss": 2.0233,
"step": 2530
},
{
"epoch": 22.0,
"eval_loss": 2.0102856159210205,
"eval_runtime": 5.7047,
"eval_samples_per_second": 7.012,
"eval_steps_per_second": 0.876,
"step": 2530
},
{
"epoch": 23.0,
"learning_rate": 5.417391304347826e-06,
"loss": 2.0363,
"step": 2645
},
{
"epoch": 23.0,
"eval_loss": 2.0540664196014404,
"eval_runtime": 5.7156,
"eval_samples_per_second": 6.998,
"eval_steps_per_second": 0.875,
"step": 2645
},
{
"epoch": 24.0,
"learning_rate": 5.2173913043478265e-06,
"loss": 1.9837,
"step": 2760
},
{
"epoch": 24.0,
"eval_loss": 1.9924190044403076,
"eval_runtime": 5.6809,
"eval_samples_per_second": 7.041,
"eval_steps_per_second": 0.88,
"step": 2760
},
{
"epoch": 25.0,
"learning_rate": 5.017391304347826e-06,
"loss": 1.9943,
"step": 2875
},
{
"epoch": 25.0,
"eval_loss": 2.0557620525360107,
"eval_runtime": 5.7087,
"eval_samples_per_second": 7.007,
"eval_steps_per_second": 0.876,
"step": 2875
},
{
"epoch": 26.0,
"learning_rate": 4.817391304347827e-06,
"loss": 1.9846,
"step": 2990
},
{
"epoch": 26.0,
"eval_loss": 1.9873688220977783,
"eval_runtime": 5.6682,
"eval_samples_per_second": 7.057,
"eval_steps_per_second": 0.882,
"step": 2990
},
{
"epoch": 27.0,
"learning_rate": 4.617391304347826e-06,
"loss": 1.9601,
"step": 3105
},
{
"epoch": 27.0,
"eval_loss": 1.9554007053375244,
"eval_runtime": 5.7979,
"eval_samples_per_second": 6.899,
"eval_steps_per_second": 0.862,
"step": 3105
},
{
"epoch": 28.0,
"learning_rate": 4.4173913043478265e-06,
"loss": 1.9837,
"step": 3220
},
{
"epoch": 28.0,
"eval_loss": 1.9988619089126587,
"eval_runtime": 5.7796,
"eval_samples_per_second": 6.921,
"eval_steps_per_second": 0.865,
"step": 3220
},
{
"epoch": 29.0,
"learning_rate": 4.217391304347827e-06,
"loss": 1.9664,
"step": 3335
},
{
"epoch": 29.0,
"eval_loss": 1.9875919818878174,
"eval_runtime": 5.7433,
"eval_samples_per_second": 6.965,
"eval_steps_per_second": 0.871,
"step": 3335
},
{
"epoch": 30.0,
"learning_rate": 4.017391304347826e-06,
"loss": 1.966,
"step": 3450
},
{
"epoch": 30.0,
"eval_loss": 1.9754610061645508,
"eval_runtime": 5.8653,
"eval_samples_per_second": 6.82,
"eval_steps_per_second": 0.852,
"step": 3450
},
{
"epoch": 31.0,
"learning_rate": 3.819130434782609e-06,
"loss": 1.9226,
"step": 3565
},
{
"epoch": 31.0,
"eval_loss": 1.9357328414916992,
"eval_runtime": 5.765,
"eval_samples_per_second": 6.938,
"eval_steps_per_second": 0.867,
"step": 3565
},
{
"epoch": 32.0,
"learning_rate": 3.6191304347826088e-06,
"loss": 1.9405,
"step": 3680
},
{
"epoch": 32.0,
"eval_loss": 1.9239734411239624,
"eval_runtime": 5.8194,
"eval_samples_per_second": 6.874,
"eval_steps_per_second": 0.859,
"step": 3680
},
{
"epoch": 33.0,
"learning_rate": 3.4191304347826086e-06,
"loss": 1.9035,
"step": 3795
},
{
"epoch": 33.0,
"eval_loss": 1.9410585165023804,
"eval_runtime": 5.8097,
"eval_samples_per_second": 6.885,
"eval_steps_per_second": 0.861,
"step": 3795
},
{
"epoch": 34.0,
"learning_rate": 3.219130434782609e-06,
"loss": 1.8924,
"step": 3910
},
{
"epoch": 34.0,
"eval_loss": 1.9291362762451172,
"eval_runtime": 5.8014,
"eval_samples_per_second": 6.895,
"eval_steps_per_second": 0.862,
"step": 3910
},
{
"epoch": 35.0,
"learning_rate": 3.019130434782609e-06,
"loss": 1.8801,
"step": 4025
},
{
"epoch": 35.0,
"eval_loss": 1.9660656452178955,
"eval_runtime": 5.7747,
"eval_samples_per_second": 6.927,
"eval_steps_per_second": 0.866,
"step": 4025
},
{
"epoch": 36.0,
"learning_rate": 2.819130434782609e-06,
"loss": 1.8698,
"step": 4140
},
{
"epoch": 36.0,
"eval_loss": 1.9104881286621094,
"eval_runtime": 5.7592,
"eval_samples_per_second": 6.945,
"eval_steps_per_second": 0.868,
"step": 4140
},
{
"epoch": 37.0,
"learning_rate": 2.619130434782609e-06,
"loss": 1.8572,
"step": 4255
},
{
"epoch": 37.0,
"eval_loss": 1.944820761680603,
"eval_runtime": 5.7796,
"eval_samples_per_second": 6.921,
"eval_steps_per_second": 0.865,
"step": 4255
},
{
"epoch": 38.0,
"learning_rate": 2.419130434782609e-06,
"loss": 1.8756,
"step": 4370
},
{
"epoch": 38.0,
"eval_loss": 1.9674819707870483,
"eval_runtime": 5.7301,
"eval_samples_per_second": 6.981,
"eval_steps_per_second": 0.873,
"step": 4370
},
{
"epoch": 39.0,
"learning_rate": 2.219130434782609e-06,
"loss": 1.8593,
"step": 4485
},
{
"epoch": 39.0,
"eval_loss": 1.9364864826202393,
"eval_runtime": 5.8116,
"eval_samples_per_second": 6.883,
"eval_steps_per_second": 0.86,
"step": 4485
},
{
"epoch": 40.0,
"learning_rate": 2.019130434782609e-06,
"loss": 1.8713,
"step": 4600
},
{
"epoch": 40.0,
"eval_loss": 1.9382976293563843,
"eval_runtime": 5.7132,
"eval_samples_per_second": 7.001,
"eval_steps_per_second": 0.875,
"step": 4600
},
{
"epoch": 41.0,
"learning_rate": 1.8191304347826088e-06,
"loss": 1.8436,
"step": 4715
},
{
"epoch": 41.0,
"eval_loss": 1.967057466506958,
"eval_runtime": 5.7284,
"eval_samples_per_second": 6.983,
"eval_steps_per_second": 0.873,
"step": 4715
},
{
"epoch": 42.0,
"learning_rate": 1.6191304347826088e-06,
"loss": 1.83,
"step": 4830
},
{
"epoch": 42.0,
"eval_loss": 1.9526548385620117,
"eval_runtime": 5.6918,
"eval_samples_per_second": 7.028,
"eval_steps_per_second": 0.878,
"step": 4830
},
{
"epoch": 43.0,
"learning_rate": 1.4191304347826089e-06,
"loss": 1.857,
"step": 4945
},
{
"epoch": 43.0,
"eval_loss": 1.944758653640747,
"eval_runtime": 5.7519,
"eval_samples_per_second": 6.954,
"eval_steps_per_second": 0.869,
"step": 4945
},
{
"epoch": 44.0,
"learning_rate": 1.2191304347826089e-06,
"loss": 1.8318,
"step": 5060
},
{
"epoch": 44.0,
"eval_loss": 1.9366220235824585,
"eval_runtime": 5.7436,
"eval_samples_per_second": 6.964,
"eval_steps_per_second": 0.871,
"step": 5060
},
{
"epoch": 45.0,
"learning_rate": 1.0191304347826089e-06,
"loss": 1.8177,
"step": 5175
},
{
"epoch": 45.0,
"eval_loss": 1.9388927221298218,
"eval_runtime": 5.8021,
"eval_samples_per_second": 6.894,
"eval_steps_per_second": 0.862,
"step": 5175
},
{
"epoch": 46.0,
"learning_rate": 8.191304347826088e-07,
"loss": 1.8034,
"step": 5290
},
{
"epoch": 46.0,
"eval_loss": 1.905003309249878,
"eval_runtime": 5.7813,
"eval_samples_per_second": 6.919,
"eval_steps_per_second": 0.865,
"step": 5290
},
{
"epoch": 47.0,
"learning_rate": 6.191304347826088e-07,
"loss": 1.8226,
"step": 5405
},
{
"epoch": 47.0,
"eval_loss": 1.9226171970367432,
"eval_runtime": 5.8014,
"eval_samples_per_second": 6.895,
"eval_steps_per_second": 0.862,
"step": 5405
},
{
"epoch": 48.0,
"learning_rate": 4.1913043478260874e-07,
"loss": 1.818,
"step": 5520
},
{
"epoch": 48.0,
"eval_loss": 1.9150111675262451,
"eval_runtime": 5.7701,
"eval_samples_per_second": 6.932,
"eval_steps_per_second": 0.867,
"step": 5520
},
{
"epoch": 49.0,
"learning_rate": 2.191304347826087e-07,
"loss": 1.8148,
"step": 5635
},
{
"epoch": 49.0,
"eval_loss": 1.9168732166290283,
"eval_runtime": 5.7338,
"eval_samples_per_second": 6.976,
"eval_steps_per_second": 0.872,
"step": 5635
},
{
"epoch": 50.0,
"learning_rate": 1.91304347826087e-08,
"loss": 1.7984,
"step": 5750
},
{
"epoch": 50.0,
"eval_loss": 1.9223819971084595,
"eval_runtime": 5.7595,
"eval_samples_per_second": 6.945,
"eval_steps_per_second": 0.868,
"step": 5750
},
{
"epoch": 50.0,
"step": 5750,
"total_flos": 1.098949102848e+19,
"train_loss": 2.026795845363451,
"train_runtime": 4682.5898,
"train_samples_per_second": 4.912,
"train_steps_per_second": 1.228
}
],
"logging_steps": 500,
"max_steps": 5750,
"num_train_epochs": 50,
"save_steps": 500,
"total_flos": 1.098949102848e+19,
"trial_name": null,
"trial_params": null
}