{ "best_metric": 0.8117647058823529, "best_model_checkpoint": "swinv2-tiny-patch4-window8-256-dmae-humeda-DAV23/checkpoint-170", "epoch": 37.666666666666664, "eval_steps": 500, "global_step": 640, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.7272727272727273, "grad_norm": 10.422093391418457, "learning_rate": 9.375000000000001e-06, "loss": 6.4493, "step": 12 }, { "epoch": 1.0, "eval_accuracy": 0.29411764705882354, "eval_loss": 1.5280741453170776, "eval_runtime": 1.7296, "eval_samples_per_second": 49.145, "eval_steps_per_second": 1.735, "step": 17 }, { "epoch": 1.4242424242424243, "grad_norm": 14.379430770874023, "learning_rate": 1.8750000000000002e-05, "loss": 5.7922, "step": 24 }, { "epoch": 2.0, "eval_accuracy": 0.38823529411764707, "eval_loss": 1.317600965499878, "eval_runtime": 1.301, "eval_samples_per_second": 65.332, "eval_steps_per_second": 2.306, "step": 34 }, { "epoch": 2.121212121212121, "grad_norm": 28.70977783203125, "learning_rate": 2.8125000000000003e-05, "loss": 5.0076, "step": 36 }, { "epoch": 2.8484848484848486, "grad_norm": 24.181316375732422, "learning_rate": 3.7500000000000003e-05, "loss": 4.2502, "step": 48 }, { "epoch": 3.0, "eval_accuracy": 0.43529411764705883, "eval_loss": 1.2014732360839844, "eval_runtime": 1.3256, "eval_samples_per_second": 64.12, "eval_steps_per_second": 2.263, "step": 51 }, { "epoch": 3.5454545454545454, "grad_norm": 29.8701171875, "learning_rate": 4.6875e-05, "loss": 3.2402, "step": 60 }, { "epoch": 4.0, "eval_accuracy": 0.7176470588235294, "eval_loss": 0.8901777267456055, "eval_runtime": 1.6039, "eval_samples_per_second": 52.995, "eval_steps_per_second": 1.87, "step": 68 }, { "epoch": 4.242424242424242, "grad_norm": 43.26256561279297, "learning_rate": 4.930555555555556e-05, "loss": 2.8895, "step": 72 }, { "epoch": 4.96969696969697, "grad_norm": 38.71839904785156, "learning_rate": 4.8263888888888895e-05, "loss": 2.5386, "step": 84 }, { "epoch": 5.0, "eval_accuracy": 0.7764705882352941, "eval_loss": 0.6509169340133667, "eval_runtime": 1.3254, "eval_samples_per_second": 64.131, "eval_steps_per_second": 2.263, "step": 85 }, { "epoch": 5.666666666666667, "grad_norm": 35.30036544799805, "learning_rate": 4.722222222222222e-05, "loss": 2.0351, "step": 96 }, { "epoch": 6.0, "eval_accuracy": 0.7647058823529411, "eval_loss": 0.6758585572242737, "eval_runtime": 1.3401, "eval_samples_per_second": 63.43, "eval_steps_per_second": 2.239, "step": 102 }, { "epoch": 6.363636363636363, "grad_norm": 36.81019973754883, "learning_rate": 4.618055555555556e-05, "loss": 1.8225, "step": 108 }, { "epoch": 7.0, "eval_accuracy": 0.7764705882352941, "eval_loss": 0.6606881022453308, "eval_runtime": 1.7388, "eval_samples_per_second": 48.883, "eval_steps_per_second": 1.725, "step": 119 }, { "epoch": 7.0606060606060606, "grad_norm": 38.81963348388672, "learning_rate": 4.5138888888888894e-05, "loss": 1.5799, "step": 120 }, { "epoch": 7.787878787878788, "grad_norm": 34.68444061279297, "learning_rate": 4.4097222222222226e-05, "loss": 1.4778, "step": 132 }, { "epoch": 8.0, "eval_accuracy": 0.7529411764705882, "eval_loss": 0.7161939740180969, "eval_runtime": 1.3317, "eval_samples_per_second": 63.829, "eval_steps_per_second": 2.253, "step": 136 }, { "epoch": 8.484848484848484, "grad_norm": 33.96342849731445, "learning_rate": 4.305555555555556e-05, "loss": 1.4076, "step": 144 }, { "epoch": 9.0, "eval_accuracy": 0.7294117647058823, "eval_loss": 0.9084261059761047, "eval_runtime": 1.3286, "eval_samples_per_second": 63.976, "eval_steps_per_second": 2.258, "step": 153 }, { "epoch": 9.181818181818182, "grad_norm": 51.935420989990234, "learning_rate": 4.201388888888889e-05, "loss": 1.3522, "step": 156 }, { "epoch": 9.909090909090908, "grad_norm": 35.646156311035156, "learning_rate": 4.0972222222222225e-05, "loss": 1.2056, "step": 168 }, { "epoch": 10.0, "eval_accuracy": 0.8117647058823529, "eval_loss": 0.6900736689567566, "eval_runtime": 1.3237, "eval_samples_per_second": 64.212, "eval_steps_per_second": 2.266, "step": 170 }, { "epoch": 10.606060606060606, "grad_norm": 39.277122497558594, "learning_rate": 3.993055555555556e-05, "loss": 0.9552, "step": 180 }, { "epoch": 11.0, "eval_accuracy": 0.7764705882352941, "eval_loss": 0.9153303503990173, "eval_runtime": 1.3258, "eval_samples_per_second": 64.11, "eval_steps_per_second": 2.263, "step": 187 }, { "epoch": 11.303030303030303, "grad_norm": 32.70234298706055, "learning_rate": 3.888888888888889e-05, "loss": 1.0259, "step": 192 }, { "epoch": 12.0, "grad_norm": 27.082563400268555, "learning_rate": 3.7847222222222224e-05, "loss": 0.9859, "step": 204 }, { "epoch": 12.0, "eval_accuracy": 0.7529411764705882, "eval_loss": 0.8693907856941223, "eval_runtime": 1.3304, "eval_samples_per_second": 63.892, "eval_steps_per_second": 2.255, "step": 204 }, { "epoch": 12.727272727272727, "grad_norm": 25.17854881286621, "learning_rate": 3.6805555555555556e-05, "loss": 0.8309, "step": 216 }, { "epoch": 13.0, "eval_accuracy": 0.8, "eval_loss": 0.7666147351264954, "eval_runtime": 1.5029, "eval_samples_per_second": 56.559, "eval_steps_per_second": 1.996, "step": 221 }, { "epoch": 13.424242424242424, "grad_norm": 42.24884033203125, "learning_rate": 3.576388888888889e-05, "loss": 0.7722, "step": 228 }, { "epoch": 14.0, "eval_accuracy": 0.7529411764705882, "eval_loss": 0.9117515087127686, "eval_runtime": 1.4392, "eval_samples_per_second": 59.061, "eval_steps_per_second": 2.085, "step": 238 }, { "epoch": 14.121212121212121, "grad_norm": 25.000560760498047, "learning_rate": 3.472222222222222e-05, "loss": 0.7479, "step": 240 }, { "epoch": 14.848484848484848, "grad_norm": 23.831018447875977, "learning_rate": 3.368055555555556e-05, "loss": 0.7632, "step": 252 }, { "epoch": 15.0, "eval_accuracy": 0.7529411764705882, "eval_loss": 0.89529949426651, "eval_runtime": 1.3464, "eval_samples_per_second": 63.129, "eval_steps_per_second": 2.228, "step": 255 }, { "epoch": 15.545454545454545, "grad_norm": 37.80634689331055, "learning_rate": 3.263888888888889e-05, "loss": 0.5868, "step": 264 }, { "epoch": 16.0, "eval_accuracy": 0.7529411764705882, "eval_loss": 0.9678363800048828, "eval_runtime": 1.3437, "eval_samples_per_second": 63.259, "eval_steps_per_second": 2.233, "step": 272 }, { "epoch": 16.242424242424242, "grad_norm": 31.965198516845703, "learning_rate": 3.159722222222222e-05, "loss": 0.8369, "step": 276 }, { "epoch": 16.96969696969697, "grad_norm": 43.057701110839844, "learning_rate": 3.055555555555556e-05, "loss": 0.6577, "step": 288 }, { "epoch": 17.0, "eval_accuracy": 0.7764705882352941, "eval_loss": 1.0502737760543823, "eval_runtime": 1.7574, "eval_samples_per_second": 48.368, "eval_steps_per_second": 1.707, "step": 289 }, { "epoch": 17.666666666666668, "grad_norm": 22.851730346679688, "learning_rate": 2.951388888888889e-05, "loss": 0.5816, "step": 300 }, { "epoch": 18.0, "eval_accuracy": 0.7294117647058823, "eval_loss": 1.0601861476898193, "eval_runtime": 1.4181, "eval_samples_per_second": 59.94, "eval_steps_per_second": 2.116, "step": 306 }, { "epoch": 18.363636363636363, "grad_norm": 24.805330276489258, "learning_rate": 2.8472222222222223e-05, "loss": 0.6222, "step": 312 }, { "epoch": 19.0, "eval_accuracy": 0.7764705882352941, "eval_loss": 1.1542593240737915, "eval_runtime": 1.3489, "eval_samples_per_second": 63.014, "eval_steps_per_second": 2.224, "step": 323 }, { "epoch": 19.060606060606062, "grad_norm": 24.57158851623535, "learning_rate": 2.743055555555556e-05, "loss": 0.6667, "step": 324 }, { "epoch": 19.78787878787879, "grad_norm": 52.441471099853516, "learning_rate": 2.6388888888888892e-05, "loss": 0.4861, "step": 336 }, { "epoch": 20.0, "eval_accuracy": 0.8117647058823529, "eval_loss": 0.9739417433738708, "eval_runtime": 1.3332, "eval_samples_per_second": 63.755, "eval_steps_per_second": 2.25, "step": 340 }, { "epoch": 20.484848484848484, "grad_norm": 20.256858825683594, "learning_rate": 2.534722222222222e-05, "loss": 0.4422, "step": 348 }, { "epoch": 21.0, "eval_accuracy": 0.8, "eval_loss": 1.035377860069275, "eval_runtime": 1.7575, "eval_samples_per_second": 48.365, "eval_steps_per_second": 1.707, "step": 357 }, { "epoch": 21.181818181818183, "grad_norm": 30.408321380615234, "learning_rate": 2.4305555555555558e-05, "loss": 0.5374, "step": 360 }, { "epoch": 21.90909090909091, "grad_norm": 16.08924102783203, "learning_rate": 2.326388888888889e-05, "loss": 0.506, "step": 372 }, { "epoch": 22.0, "eval_accuracy": 0.8117647058823529, "eval_loss": 1.109660029411316, "eval_runtime": 1.3601, "eval_samples_per_second": 62.497, "eval_steps_per_second": 2.206, "step": 374 }, { "epoch": 22.606060606060606, "grad_norm": 19.899555206298828, "learning_rate": 2.2222222222222223e-05, "loss": 0.3833, "step": 384 }, { "epoch": 23.0, "eval_accuracy": 0.7764705882352941, "eval_loss": 1.2008516788482666, "eval_runtime": 1.3596, "eval_samples_per_second": 62.517, "eval_steps_per_second": 2.206, "step": 391 }, { "epoch": 23.303030303030305, "grad_norm": 23.089004516601562, "learning_rate": 2.1180555555555556e-05, "loss": 0.5273, "step": 396 }, { "epoch": 24.0, "grad_norm": 12.893085479736328, "learning_rate": 2.013888888888889e-05, "loss": 0.4574, "step": 408 }, { "epoch": 24.0, "eval_accuracy": 0.7764705882352941, "eval_loss": 1.1365725994110107, "eval_runtime": 1.3252, "eval_samples_per_second": 64.141, "eval_steps_per_second": 2.264, "step": 408 }, { "epoch": 24.727272727272727, "grad_norm": 29.011215209960938, "learning_rate": 1.9097222222222222e-05, "loss": 0.4467, "step": 420 }, { "epoch": 25.0, "eval_accuracy": 0.8117647058823529, "eval_loss": 1.0601016283035278, "eval_runtime": 1.4691, "eval_samples_per_second": 57.86, "eval_steps_per_second": 2.042, "step": 425 }, { "epoch": 25.424242424242426, "grad_norm": 37.92557907104492, "learning_rate": 1.8055555555555555e-05, "loss": 0.4451, "step": 432 }, { "epoch": 26.0, "eval_accuracy": 0.7764705882352941, "eval_loss": 1.0934613943099976, "eval_runtime": 1.3569, "eval_samples_per_second": 62.643, "eval_steps_per_second": 2.211, "step": 442 }, { "epoch": 26.12121212121212, "grad_norm": 29.263154983520508, "learning_rate": 1.701388888888889e-05, "loss": 0.483, "step": 444 }, { "epoch": 26.848484848484848, "grad_norm": 20.336957931518555, "learning_rate": 1.597222222222222e-05, "loss": 0.4384, "step": 456 }, { "epoch": 27.0, "eval_accuracy": 0.7647058823529411, "eval_loss": 1.1616755723953247, "eval_runtime": 1.3388, "eval_samples_per_second": 63.492, "eval_steps_per_second": 2.241, "step": 459 }, { "epoch": 27.545454545454547, "grad_norm": 26.435325622558594, "learning_rate": 1.4930555555555557e-05, "loss": 0.4321, "step": 468 }, { "epoch": 28.0, "eval_accuracy": 0.7764705882352941, "eval_loss": 1.1012462377548218, "eval_runtime": 1.7645, "eval_samples_per_second": 48.171, "eval_steps_per_second": 1.7, "step": 476 }, { "epoch": 28.242424242424242, "grad_norm": 35.24657440185547, "learning_rate": 1.388888888888889e-05, "loss": 0.4068, "step": 480 }, { "epoch": 28.96969696969697, "grad_norm": 29.191991806030273, "learning_rate": 1.2847222222222222e-05, "loss": 0.4398, "step": 492 }, { "epoch": 29.0, "eval_accuracy": 0.788235294117647, "eval_loss": 1.082492709159851, "eval_runtime": 1.3299, "eval_samples_per_second": 63.914, "eval_steps_per_second": 2.256, "step": 493 }, { "epoch": 29.666666666666668, "grad_norm": 26.035062789916992, "learning_rate": 1.1805555555555555e-05, "loss": 0.361, "step": 504 }, { "epoch": 30.0, "eval_accuracy": 0.7647058823529411, "eval_loss": 1.112740397453308, "eval_runtime": 1.3356, "eval_samples_per_second": 63.641, "eval_steps_per_second": 2.246, "step": 510 }, { "epoch": 30.363636363636363, "grad_norm": 24.128007888793945, "learning_rate": 1.076388888888889e-05, "loss": 0.4428, "step": 516 }, { "epoch": 31.0, "eval_accuracy": 0.7529411764705882, "eval_loss": 1.2024400234222412, "eval_runtime": 1.8084, "eval_samples_per_second": 47.003, "eval_steps_per_second": 1.659, "step": 527 }, { "epoch": 31.060606060606062, "grad_norm": 29.159976959228516, "learning_rate": 9.722222222222223e-06, "loss": 0.4293, "step": 528 }, { "epoch": 31.78787878787879, "grad_norm": 19.373197555541992, "learning_rate": 8.680555555555556e-06, "loss": 0.451, "step": 540 }, { "epoch": 32.0, "eval_accuracy": 0.7647058823529411, "eval_loss": 1.1550030708312988, "eval_runtime": 1.3443, "eval_samples_per_second": 63.23, "eval_steps_per_second": 2.232, "step": 544 }, { "epoch": 32.484848484848484, "grad_norm": 12.227431297302246, "learning_rate": 7.63888888888889e-06, "loss": 0.403, "step": 552 }, { "epoch": 33.0, "eval_accuracy": 0.7764705882352941, "eval_loss": 1.1645594835281372, "eval_runtime": 1.3526, "eval_samples_per_second": 62.842, "eval_steps_per_second": 2.218, "step": 561 }, { "epoch": 33.18181818181818, "grad_norm": 20.577377319335938, "learning_rate": 6.597222222222223e-06, "loss": 0.4113, "step": 564 }, { "epoch": 33.90909090909091, "grad_norm": 24.493812561035156, "learning_rate": 5.555555555555556e-06, "loss": 0.3059, "step": 576 }, { "epoch": 34.0, "eval_accuracy": 0.7764705882352941, "eval_loss": 1.2441880702972412, "eval_runtime": 1.3246, "eval_samples_per_second": 64.172, "eval_steps_per_second": 2.265, "step": 578 }, { "epoch": 34.60606060606061, "grad_norm": 28.553544998168945, "learning_rate": 4.513888888888889e-06, "loss": 0.3022, "step": 588 }, { "epoch": 35.0, "eval_accuracy": 0.7764705882352941, "eval_loss": 1.1975644826889038, "eval_runtime": 1.5644, "eval_samples_per_second": 54.334, "eval_steps_per_second": 1.918, "step": 595 }, { "epoch": 35.303030303030305, "grad_norm": 17.071916580200195, "learning_rate": 3.4722222222222224e-06, "loss": 0.3343, "step": 600 }, { "epoch": 36.0, "grad_norm": 10.918073654174805, "learning_rate": 2.4305555555555557e-06, "loss": 0.319, "step": 612 }, { "epoch": 36.0, "eval_accuracy": 0.7764705882352941, "eval_loss": 1.1564186811447144, "eval_runtime": 1.4741, "eval_samples_per_second": 57.663, "eval_steps_per_second": 2.035, "step": 612 }, { "epoch": 36.72727272727273, "grad_norm": 8.741593360900879, "learning_rate": 1.388888888888889e-06, "loss": 0.3737, "step": 624 }, { "epoch": 37.0, "eval_accuracy": 0.7764705882352941, "eval_loss": 1.1857171058654785, "eval_runtime": 1.3517, "eval_samples_per_second": 62.883, "eval_steps_per_second": 2.219, "step": 629 }, { "epoch": 37.42424242424242, "grad_norm": 20.94017219543457, "learning_rate": 3.4722222222222224e-07, "loss": 0.3063, "step": 636 }, { "epoch": 37.666666666666664, "eval_accuracy": 0.7764705882352941, "eval_loss": 1.193001627922058, "eval_runtime": 1.6167, "eval_samples_per_second": 52.578, "eval_steps_per_second": 1.856, "step": 640 }, { "epoch": 37.666666666666664, "step": 640, "total_flos": 2.572737077098709e+18, "train_loss": 1.1635722614824773, "train_runtime": 1917.6947, "train_samples_per_second": 43.782, "train_steps_per_second": 0.334 } ], "logging_steps": 12, "max_steps": 640, "num_input_tokens_seen": 0, "num_train_epochs": 40, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.572737077098709e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }