RobertoSonic's picture
End of training
4b7b659 verified
{
"best_metric": 0.8117647058823529,
"best_model_checkpoint": "swinv2-tiny-patch4-window8-256-dmae-humeda-DAV23/checkpoint-170",
"epoch": 37.666666666666664,
"eval_steps": 500,
"global_step": 640,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.7272727272727273,
"grad_norm": 10.422093391418457,
"learning_rate": 9.375000000000001e-06,
"loss": 6.4493,
"step": 12
},
{
"epoch": 1.0,
"eval_accuracy": 0.29411764705882354,
"eval_loss": 1.5280741453170776,
"eval_runtime": 1.7296,
"eval_samples_per_second": 49.145,
"eval_steps_per_second": 1.735,
"step": 17
},
{
"epoch": 1.4242424242424243,
"grad_norm": 14.379430770874023,
"learning_rate": 1.8750000000000002e-05,
"loss": 5.7922,
"step": 24
},
{
"epoch": 2.0,
"eval_accuracy": 0.38823529411764707,
"eval_loss": 1.317600965499878,
"eval_runtime": 1.301,
"eval_samples_per_second": 65.332,
"eval_steps_per_second": 2.306,
"step": 34
},
{
"epoch": 2.121212121212121,
"grad_norm": 28.70977783203125,
"learning_rate": 2.8125000000000003e-05,
"loss": 5.0076,
"step": 36
},
{
"epoch": 2.8484848484848486,
"grad_norm": 24.181316375732422,
"learning_rate": 3.7500000000000003e-05,
"loss": 4.2502,
"step": 48
},
{
"epoch": 3.0,
"eval_accuracy": 0.43529411764705883,
"eval_loss": 1.2014732360839844,
"eval_runtime": 1.3256,
"eval_samples_per_second": 64.12,
"eval_steps_per_second": 2.263,
"step": 51
},
{
"epoch": 3.5454545454545454,
"grad_norm": 29.8701171875,
"learning_rate": 4.6875e-05,
"loss": 3.2402,
"step": 60
},
{
"epoch": 4.0,
"eval_accuracy": 0.7176470588235294,
"eval_loss": 0.8901777267456055,
"eval_runtime": 1.6039,
"eval_samples_per_second": 52.995,
"eval_steps_per_second": 1.87,
"step": 68
},
{
"epoch": 4.242424242424242,
"grad_norm": 43.26256561279297,
"learning_rate": 4.930555555555556e-05,
"loss": 2.8895,
"step": 72
},
{
"epoch": 4.96969696969697,
"grad_norm": 38.71839904785156,
"learning_rate": 4.8263888888888895e-05,
"loss": 2.5386,
"step": 84
},
{
"epoch": 5.0,
"eval_accuracy": 0.7764705882352941,
"eval_loss": 0.6509169340133667,
"eval_runtime": 1.3254,
"eval_samples_per_second": 64.131,
"eval_steps_per_second": 2.263,
"step": 85
},
{
"epoch": 5.666666666666667,
"grad_norm": 35.30036544799805,
"learning_rate": 4.722222222222222e-05,
"loss": 2.0351,
"step": 96
},
{
"epoch": 6.0,
"eval_accuracy": 0.7647058823529411,
"eval_loss": 0.6758585572242737,
"eval_runtime": 1.3401,
"eval_samples_per_second": 63.43,
"eval_steps_per_second": 2.239,
"step": 102
},
{
"epoch": 6.363636363636363,
"grad_norm": 36.81019973754883,
"learning_rate": 4.618055555555556e-05,
"loss": 1.8225,
"step": 108
},
{
"epoch": 7.0,
"eval_accuracy": 0.7764705882352941,
"eval_loss": 0.6606881022453308,
"eval_runtime": 1.7388,
"eval_samples_per_second": 48.883,
"eval_steps_per_second": 1.725,
"step": 119
},
{
"epoch": 7.0606060606060606,
"grad_norm": 38.81963348388672,
"learning_rate": 4.5138888888888894e-05,
"loss": 1.5799,
"step": 120
},
{
"epoch": 7.787878787878788,
"grad_norm": 34.68444061279297,
"learning_rate": 4.4097222222222226e-05,
"loss": 1.4778,
"step": 132
},
{
"epoch": 8.0,
"eval_accuracy": 0.7529411764705882,
"eval_loss": 0.7161939740180969,
"eval_runtime": 1.3317,
"eval_samples_per_second": 63.829,
"eval_steps_per_second": 2.253,
"step": 136
},
{
"epoch": 8.484848484848484,
"grad_norm": 33.96342849731445,
"learning_rate": 4.305555555555556e-05,
"loss": 1.4076,
"step": 144
},
{
"epoch": 9.0,
"eval_accuracy": 0.7294117647058823,
"eval_loss": 0.9084261059761047,
"eval_runtime": 1.3286,
"eval_samples_per_second": 63.976,
"eval_steps_per_second": 2.258,
"step": 153
},
{
"epoch": 9.181818181818182,
"grad_norm": 51.935420989990234,
"learning_rate": 4.201388888888889e-05,
"loss": 1.3522,
"step": 156
},
{
"epoch": 9.909090909090908,
"grad_norm": 35.646156311035156,
"learning_rate": 4.0972222222222225e-05,
"loss": 1.2056,
"step": 168
},
{
"epoch": 10.0,
"eval_accuracy": 0.8117647058823529,
"eval_loss": 0.6900736689567566,
"eval_runtime": 1.3237,
"eval_samples_per_second": 64.212,
"eval_steps_per_second": 2.266,
"step": 170
},
{
"epoch": 10.606060606060606,
"grad_norm": 39.277122497558594,
"learning_rate": 3.993055555555556e-05,
"loss": 0.9552,
"step": 180
},
{
"epoch": 11.0,
"eval_accuracy": 0.7764705882352941,
"eval_loss": 0.9153303503990173,
"eval_runtime": 1.3258,
"eval_samples_per_second": 64.11,
"eval_steps_per_second": 2.263,
"step": 187
},
{
"epoch": 11.303030303030303,
"grad_norm": 32.70234298706055,
"learning_rate": 3.888888888888889e-05,
"loss": 1.0259,
"step": 192
},
{
"epoch": 12.0,
"grad_norm": 27.082563400268555,
"learning_rate": 3.7847222222222224e-05,
"loss": 0.9859,
"step": 204
},
{
"epoch": 12.0,
"eval_accuracy": 0.7529411764705882,
"eval_loss": 0.8693907856941223,
"eval_runtime": 1.3304,
"eval_samples_per_second": 63.892,
"eval_steps_per_second": 2.255,
"step": 204
},
{
"epoch": 12.727272727272727,
"grad_norm": 25.17854881286621,
"learning_rate": 3.6805555555555556e-05,
"loss": 0.8309,
"step": 216
},
{
"epoch": 13.0,
"eval_accuracy": 0.8,
"eval_loss": 0.7666147351264954,
"eval_runtime": 1.5029,
"eval_samples_per_second": 56.559,
"eval_steps_per_second": 1.996,
"step": 221
},
{
"epoch": 13.424242424242424,
"grad_norm": 42.24884033203125,
"learning_rate": 3.576388888888889e-05,
"loss": 0.7722,
"step": 228
},
{
"epoch": 14.0,
"eval_accuracy": 0.7529411764705882,
"eval_loss": 0.9117515087127686,
"eval_runtime": 1.4392,
"eval_samples_per_second": 59.061,
"eval_steps_per_second": 2.085,
"step": 238
},
{
"epoch": 14.121212121212121,
"grad_norm": 25.000560760498047,
"learning_rate": 3.472222222222222e-05,
"loss": 0.7479,
"step": 240
},
{
"epoch": 14.848484848484848,
"grad_norm": 23.831018447875977,
"learning_rate": 3.368055555555556e-05,
"loss": 0.7632,
"step": 252
},
{
"epoch": 15.0,
"eval_accuracy": 0.7529411764705882,
"eval_loss": 0.89529949426651,
"eval_runtime": 1.3464,
"eval_samples_per_second": 63.129,
"eval_steps_per_second": 2.228,
"step": 255
},
{
"epoch": 15.545454545454545,
"grad_norm": 37.80634689331055,
"learning_rate": 3.263888888888889e-05,
"loss": 0.5868,
"step": 264
},
{
"epoch": 16.0,
"eval_accuracy": 0.7529411764705882,
"eval_loss": 0.9678363800048828,
"eval_runtime": 1.3437,
"eval_samples_per_second": 63.259,
"eval_steps_per_second": 2.233,
"step": 272
},
{
"epoch": 16.242424242424242,
"grad_norm": 31.965198516845703,
"learning_rate": 3.159722222222222e-05,
"loss": 0.8369,
"step": 276
},
{
"epoch": 16.96969696969697,
"grad_norm": 43.057701110839844,
"learning_rate": 3.055555555555556e-05,
"loss": 0.6577,
"step": 288
},
{
"epoch": 17.0,
"eval_accuracy": 0.7764705882352941,
"eval_loss": 1.0502737760543823,
"eval_runtime": 1.7574,
"eval_samples_per_second": 48.368,
"eval_steps_per_second": 1.707,
"step": 289
},
{
"epoch": 17.666666666666668,
"grad_norm": 22.851730346679688,
"learning_rate": 2.951388888888889e-05,
"loss": 0.5816,
"step": 300
},
{
"epoch": 18.0,
"eval_accuracy": 0.7294117647058823,
"eval_loss": 1.0601861476898193,
"eval_runtime": 1.4181,
"eval_samples_per_second": 59.94,
"eval_steps_per_second": 2.116,
"step": 306
},
{
"epoch": 18.363636363636363,
"grad_norm": 24.805330276489258,
"learning_rate": 2.8472222222222223e-05,
"loss": 0.6222,
"step": 312
},
{
"epoch": 19.0,
"eval_accuracy": 0.7764705882352941,
"eval_loss": 1.1542593240737915,
"eval_runtime": 1.3489,
"eval_samples_per_second": 63.014,
"eval_steps_per_second": 2.224,
"step": 323
},
{
"epoch": 19.060606060606062,
"grad_norm": 24.57158851623535,
"learning_rate": 2.743055555555556e-05,
"loss": 0.6667,
"step": 324
},
{
"epoch": 19.78787878787879,
"grad_norm": 52.441471099853516,
"learning_rate": 2.6388888888888892e-05,
"loss": 0.4861,
"step": 336
},
{
"epoch": 20.0,
"eval_accuracy": 0.8117647058823529,
"eval_loss": 0.9739417433738708,
"eval_runtime": 1.3332,
"eval_samples_per_second": 63.755,
"eval_steps_per_second": 2.25,
"step": 340
},
{
"epoch": 20.484848484848484,
"grad_norm": 20.256858825683594,
"learning_rate": 2.534722222222222e-05,
"loss": 0.4422,
"step": 348
},
{
"epoch": 21.0,
"eval_accuracy": 0.8,
"eval_loss": 1.035377860069275,
"eval_runtime": 1.7575,
"eval_samples_per_second": 48.365,
"eval_steps_per_second": 1.707,
"step": 357
},
{
"epoch": 21.181818181818183,
"grad_norm": 30.408321380615234,
"learning_rate": 2.4305555555555558e-05,
"loss": 0.5374,
"step": 360
},
{
"epoch": 21.90909090909091,
"grad_norm": 16.08924102783203,
"learning_rate": 2.326388888888889e-05,
"loss": 0.506,
"step": 372
},
{
"epoch": 22.0,
"eval_accuracy": 0.8117647058823529,
"eval_loss": 1.109660029411316,
"eval_runtime": 1.3601,
"eval_samples_per_second": 62.497,
"eval_steps_per_second": 2.206,
"step": 374
},
{
"epoch": 22.606060606060606,
"grad_norm": 19.899555206298828,
"learning_rate": 2.2222222222222223e-05,
"loss": 0.3833,
"step": 384
},
{
"epoch": 23.0,
"eval_accuracy": 0.7764705882352941,
"eval_loss": 1.2008516788482666,
"eval_runtime": 1.3596,
"eval_samples_per_second": 62.517,
"eval_steps_per_second": 2.206,
"step": 391
},
{
"epoch": 23.303030303030305,
"grad_norm": 23.089004516601562,
"learning_rate": 2.1180555555555556e-05,
"loss": 0.5273,
"step": 396
},
{
"epoch": 24.0,
"grad_norm": 12.893085479736328,
"learning_rate": 2.013888888888889e-05,
"loss": 0.4574,
"step": 408
},
{
"epoch": 24.0,
"eval_accuracy": 0.7764705882352941,
"eval_loss": 1.1365725994110107,
"eval_runtime": 1.3252,
"eval_samples_per_second": 64.141,
"eval_steps_per_second": 2.264,
"step": 408
},
{
"epoch": 24.727272727272727,
"grad_norm": 29.011215209960938,
"learning_rate": 1.9097222222222222e-05,
"loss": 0.4467,
"step": 420
},
{
"epoch": 25.0,
"eval_accuracy": 0.8117647058823529,
"eval_loss": 1.0601016283035278,
"eval_runtime": 1.4691,
"eval_samples_per_second": 57.86,
"eval_steps_per_second": 2.042,
"step": 425
},
{
"epoch": 25.424242424242426,
"grad_norm": 37.92557907104492,
"learning_rate": 1.8055555555555555e-05,
"loss": 0.4451,
"step": 432
},
{
"epoch": 26.0,
"eval_accuracy": 0.7764705882352941,
"eval_loss": 1.0934613943099976,
"eval_runtime": 1.3569,
"eval_samples_per_second": 62.643,
"eval_steps_per_second": 2.211,
"step": 442
},
{
"epoch": 26.12121212121212,
"grad_norm": 29.263154983520508,
"learning_rate": 1.701388888888889e-05,
"loss": 0.483,
"step": 444
},
{
"epoch": 26.848484848484848,
"grad_norm": 20.336957931518555,
"learning_rate": 1.597222222222222e-05,
"loss": 0.4384,
"step": 456
},
{
"epoch": 27.0,
"eval_accuracy": 0.7647058823529411,
"eval_loss": 1.1616755723953247,
"eval_runtime": 1.3388,
"eval_samples_per_second": 63.492,
"eval_steps_per_second": 2.241,
"step": 459
},
{
"epoch": 27.545454545454547,
"grad_norm": 26.435325622558594,
"learning_rate": 1.4930555555555557e-05,
"loss": 0.4321,
"step": 468
},
{
"epoch": 28.0,
"eval_accuracy": 0.7764705882352941,
"eval_loss": 1.1012462377548218,
"eval_runtime": 1.7645,
"eval_samples_per_second": 48.171,
"eval_steps_per_second": 1.7,
"step": 476
},
{
"epoch": 28.242424242424242,
"grad_norm": 35.24657440185547,
"learning_rate": 1.388888888888889e-05,
"loss": 0.4068,
"step": 480
},
{
"epoch": 28.96969696969697,
"grad_norm": 29.191991806030273,
"learning_rate": 1.2847222222222222e-05,
"loss": 0.4398,
"step": 492
},
{
"epoch": 29.0,
"eval_accuracy": 0.788235294117647,
"eval_loss": 1.082492709159851,
"eval_runtime": 1.3299,
"eval_samples_per_second": 63.914,
"eval_steps_per_second": 2.256,
"step": 493
},
{
"epoch": 29.666666666666668,
"grad_norm": 26.035062789916992,
"learning_rate": 1.1805555555555555e-05,
"loss": 0.361,
"step": 504
},
{
"epoch": 30.0,
"eval_accuracy": 0.7647058823529411,
"eval_loss": 1.112740397453308,
"eval_runtime": 1.3356,
"eval_samples_per_second": 63.641,
"eval_steps_per_second": 2.246,
"step": 510
},
{
"epoch": 30.363636363636363,
"grad_norm": 24.128007888793945,
"learning_rate": 1.076388888888889e-05,
"loss": 0.4428,
"step": 516
},
{
"epoch": 31.0,
"eval_accuracy": 0.7529411764705882,
"eval_loss": 1.2024400234222412,
"eval_runtime": 1.8084,
"eval_samples_per_second": 47.003,
"eval_steps_per_second": 1.659,
"step": 527
},
{
"epoch": 31.060606060606062,
"grad_norm": 29.159976959228516,
"learning_rate": 9.722222222222223e-06,
"loss": 0.4293,
"step": 528
},
{
"epoch": 31.78787878787879,
"grad_norm": 19.373197555541992,
"learning_rate": 8.680555555555556e-06,
"loss": 0.451,
"step": 540
},
{
"epoch": 32.0,
"eval_accuracy": 0.7647058823529411,
"eval_loss": 1.1550030708312988,
"eval_runtime": 1.3443,
"eval_samples_per_second": 63.23,
"eval_steps_per_second": 2.232,
"step": 544
},
{
"epoch": 32.484848484848484,
"grad_norm": 12.227431297302246,
"learning_rate": 7.63888888888889e-06,
"loss": 0.403,
"step": 552
},
{
"epoch": 33.0,
"eval_accuracy": 0.7764705882352941,
"eval_loss": 1.1645594835281372,
"eval_runtime": 1.3526,
"eval_samples_per_second": 62.842,
"eval_steps_per_second": 2.218,
"step": 561
},
{
"epoch": 33.18181818181818,
"grad_norm": 20.577377319335938,
"learning_rate": 6.597222222222223e-06,
"loss": 0.4113,
"step": 564
},
{
"epoch": 33.90909090909091,
"grad_norm": 24.493812561035156,
"learning_rate": 5.555555555555556e-06,
"loss": 0.3059,
"step": 576
},
{
"epoch": 34.0,
"eval_accuracy": 0.7764705882352941,
"eval_loss": 1.2441880702972412,
"eval_runtime": 1.3246,
"eval_samples_per_second": 64.172,
"eval_steps_per_second": 2.265,
"step": 578
},
{
"epoch": 34.60606060606061,
"grad_norm": 28.553544998168945,
"learning_rate": 4.513888888888889e-06,
"loss": 0.3022,
"step": 588
},
{
"epoch": 35.0,
"eval_accuracy": 0.7764705882352941,
"eval_loss": 1.1975644826889038,
"eval_runtime": 1.5644,
"eval_samples_per_second": 54.334,
"eval_steps_per_second": 1.918,
"step": 595
},
{
"epoch": 35.303030303030305,
"grad_norm": 17.071916580200195,
"learning_rate": 3.4722222222222224e-06,
"loss": 0.3343,
"step": 600
},
{
"epoch": 36.0,
"grad_norm": 10.918073654174805,
"learning_rate": 2.4305555555555557e-06,
"loss": 0.319,
"step": 612
},
{
"epoch": 36.0,
"eval_accuracy": 0.7764705882352941,
"eval_loss": 1.1564186811447144,
"eval_runtime": 1.4741,
"eval_samples_per_second": 57.663,
"eval_steps_per_second": 2.035,
"step": 612
},
{
"epoch": 36.72727272727273,
"grad_norm": 8.741593360900879,
"learning_rate": 1.388888888888889e-06,
"loss": 0.3737,
"step": 624
},
{
"epoch": 37.0,
"eval_accuracy": 0.7764705882352941,
"eval_loss": 1.1857171058654785,
"eval_runtime": 1.3517,
"eval_samples_per_second": 62.883,
"eval_steps_per_second": 2.219,
"step": 629
},
{
"epoch": 37.42424242424242,
"grad_norm": 20.94017219543457,
"learning_rate": 3.4722222222222224e-07,
"loss": 0.3063,
"step": 636
},
{
"epoch": 37.666666666666664,
"eval_accuracy": 0.7764705882352941,
"eval_loss": 1.193001627922058,
"eval_runtime": 1.6167,
"eval_samples_per_second": 52.578,
"eval_steps_per_second": 1.856,
"step": 640
},
{
"epoch": 37.666666666666664,
"step": 640,
"total_flos": 2.572737077098709e+18,
"train_loss": 1.1635722614824773,
"train_runtime": 1917.6947,
"train_samples_per_second": 43.782,
"train_steps_per_second": 0.334
}
],
"logging_steps": 12,
"max_steps": 640,
"num_input_tokens_seen": 0,
"num_train_epochs": 40,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.572737077098709e+18,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}