|
{ |
|
"best_metric": 0.8117647058823529, |
|
"best_model_checkpoint": "swinv2-tiny-patch4-window8-256-dmae-humeda-DAV23/checkpoint-170", |
|
"epoch": 37.666666666666664, |
|
"eval_steps": 500, |
|
"global_step": 640, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.7272727272727273, |
|
"grad_norm": 10.422093391418457, |
|
"learning_rate": 9.375000000000001e-06, |
|
"loss": 6.4493, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.29411764705882354, |
|
"eval_loss": 1.5280741453170776, |
|
"eval_runtime": 1.7296, |
|
"eval_samples_per_second": 49.145, |
|
"eval_steps_per_second": 1.735, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 1.4242424242424243, |
|
"grad_norm": 14.379430770874023, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"loss": 5.7922, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.38823529411764707, |
|
"eval_loss": 1.317600965499878, |
|
"eval_runtime": 1.301, |
|
"eval_samples_per_second": 65.332, |
|
"eval_steps_per_second": 2.306, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 2.121212121212121, |
|
"grad_norm": 28.70977783203125, |
|
"learning_rate": 2.8125000000000003e-05, |
|
"loss": 5.0076, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 2.8484848484848486, |
|
"grad_norm": 24.181316375732422, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 4.2502, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.43529411764705883, |
|
"eval_loss": 1.2014732360839844, |
|
"eval_runtime": 1.3256, |
|
"eval_samples_per_second": 64.12, |
|
"eval_steps_per_second": 2.263, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 3.5454545454545454, |
|
"grad_norm": 29.8701171875, |
|
"learning_rate": 4.6875e-05, |
|
"loss": 3.2402, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7176470588235294, |
|
"eval_loss": 0.8901777267456055, |
|
"eval_runtime": 1.6039, |
|
"eval_samples_per_second": 52.995, |
|
"eval_steps_per_second": 1.87, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 4.242424242424242, |
|
"grad_norm": 43.26256561279297, |
|
"learning_rate": 4.930555555555556e-05, |
|
"loss": 2.8895, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 4.96969696969697, |
|
"grad_norm": 38.71839904785156, |
|
"learning_rate": 4.8263888888888895e-05, |
|
"loss": 2.5386, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7764705882352941, |
|
"eval_loss": 0.6509169340133667, |
|
"eval_runtime": 1.3254, |
|
"eval_samples_per_second": 64.131, |
|
"eval_steps_per_second": 2.263, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 5.666666666666667, |
|
"grad_norm": 35.30036544799805, |
|
"learning_rate": 4.722222222222222e-05, |
|
"loss": 2.0351, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7647058823529411, |
|
"eval_loss": 0.6758585572242737, |
|
"eval_runtime": 1.3401, |
|
"eval_samples_per_second": 63.43, |
|
"eval_steps_per_second": 2.239, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 6.363636363636363, |
|
"grad_norm": 36.81019973754883, |
|
"learning_rate": 4.618055555555556e-05, |
|
"loss": 1.8225, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7764705882352941, |
|
"eval_loss": 0.6606881022453308, |
|
"eval_runtime": 1.7388, |
|
"eval_samples_per_second": 48.883, |
|
"eval_steps_per_second": 1.725, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 7.0606060606060606, |
|
"grad_norm": 38.81963348388672, |
|
"learning_rate": 4.5138888888888894e-05, |
|
"loss": 1.5799, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 7.787878787878788, |
|
"grad_norm": 34.68444061279297, |
|
"learning_rate": 4.4097222222222226e-05, |
|
"loss": 1.4778, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7529411764705882, |
|
"eval_loss": 0.7161939740180969, |
|
"eval_runtime": 1.3317, |
|
"eval_samples_per_second": 63.829, |
|
"eval_steps_per_second": 2.253, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 8.484848484848484, |
|
"grad_norm": 33.96342849731445, |
|
"learning_rate": 4.305555555555556e-05, |
|
"loss": 1.4076, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7294117647058823, |
|
"eval_loss": 0.9084261059761047, |
|
"eval_runtime": 1.3286, |
|
"eval_samples_per_second": 63.976, |
|
"eval_steps_per_second": 2.258, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 9.181818181818182, |
|
"grad_norm": 51.935420989990234, |
|
"learning_rate": 4.201388888888889e-05, |
|
"loss": 1.3522, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 9.909090909090908, |
|
"grad_norm": 35.646156311035156, |
|
"learning_rate": 4.0972222222222225e-05, |
|
"loss": 1.2056, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8117647058823529, |
|
"eval_loss": 0.6900736689567566, |
|
"eval_runtime": 1.3237, |
|
"eval_samples_per_second": 64.212, |
|
"eval_steps_per_second": 2.266, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 10.606060606060606, |
|
"grad_norm": 39.277122497558594, |
|
"learning_rate": 3.993055555555556e-05, |
|
"loss": 0.9552, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.7764705882352941, |
|
"eval_loss": 0.9153303503990173, |
|
"eval_runtime": 1.3258, |
|
"eval_samples_per_second": 64.11, |
|
"eval_steps_per_second": 2.263, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 11.303030303030303, |
|
"grad_norm": 32.70234298706055, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 1.0259, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 27.082563400268555, |
|
"learning_rate": 3.7847222222222224e-05, |
|
"loss": 0.9859, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7529411764705882, |
|
"eval_loss": 0.8693907856941223, |
|
"eval_runtime": 1.3304, |
|
"eval_samples_per_second": 63.892, |
|
"eval_steps_per_second": 2.255, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 12.727272727272727, |
|
"grad_norm": 25.17854881286621, |
|
"learning_rate": 3.6805555555555556e-05, |
|
"loss": 0.8309, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.7666147351264954, |
|
"eval_runtime": 1.5029, |
|
"eval_samples_per_second": 56.559, |
|
"eval_steps_per_second": 1.996, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 13.424242424242424, |
|
"grad_norm": 42.24884033203125, |
|
"learning_rate": 3.576388888888889e-05, |
|
"loss": 0.7722, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.7529411764705882, |
|
"eval_loss": 0.9117515087127686, |
|
"eval_runtime": 1.4392, |
|
"eval_samples_per_second": 59.061, |
|
"eval_steps_per_second": 2.085, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 14.121212121212121, |
|
"grad_norm": 25.000560760498047, |
|
"learning_rate": 3.472222222222222e-05, |
|
"loss": 0.7479, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 14.848484848484848, |
|
"grad_norm": 23.831018447875977, |
|
"learning_rate": 3.368055555555556e-05, |
|
"loss": 0.7632, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.7529411764705882, |
|
"eval_loss": 0.89529949426651, |
|
"eval_runtime": 1.3464, |
|
"eval_samples_per_second": 63.129, |
|
"eval_steps_per_second": 2.228, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 15.545454545454545, |
|
"grad_norm": 37.80634689331055, |
|
"learning_rate": 3.263888888888889e-05, |
|
"loss": 0.5868, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.7529411764705882, |
|
"eval_loss": 0.9678363800048828, |
|
"eval_runtime": 1.3437, |
|
"eval_samples_per_second": 63.259, |
|
"eval_steps_per_second": 2.233, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 16.242424242424242, |
|
"grad_norm": 31.965198516845703, |
|
"learning_rate": 3.159722222222222e-05, |
|
"loss": 0.8369, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 16.96969696969697, |
|
"grad_norm": 43.057701110839844, |
|
"learning_rate": 3.055555555555556e-05, |
|
"loss": 0.6577, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.7764705882352941, |
|
"eval_loss": 1.0502737760543823, |
|
"eval_runtime": 1.7574, |
|
"eval_samples_per_second": 48.368, |
|
"eval_steps_per_second": 1.707, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 17.666666666666668, |
|
"grad_norm": 22.851730346679688, |
|
"learning_rate": 2.951388888888889e-05, |
|
"loss": 0.5816, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.7294117647058823, |
|
"eval_loss": 1.0601861476898193, |
|
"eval_runtime": 1.4181, |
|
"eval_samples_per_second": 59.94, |
|
"eval_steps_per_second": 2.116, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 18.363636363636363, |
|
"grad_norm": 24.805330276489258, |
|
"learning_rate": 2.8472222222222223e-05, |
|
"loss": 0.6222, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.7764705882352941, |
|
"eval_loss": 1.1542593240737915, |
|
"eval_runtime": 1.3489, |
|
"eval_samples_per_second": 63.014, |
|
"eval_steps_per_second": 2.224, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 19.060606060606062, |
|
"grad_norm": 24.57158851623535, |
|
"learning_rate": 2.743055555555556e-05, |
|
"loss": 0.6667, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 19.78787878787879, |
|
"grad_norm": 52.441471099853516, |
|
"learning_rate": 2.6388888888888892e-05, |
|
"loss": 0.4861, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8117647058823529, |
|
"eval_loss": 0.9739417433738708, |
|
"eval_runtime": 1.3332, |
|
"eval_samples_per_second": 63.755, |
|
"eval_steps_per_second": 2.25, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 20.484848484848484, |
|
"grad_norm": 20.256858825683594, |
|
"learning_rate": 2.534722222222222e-05, |
|
"loss": 0.4422, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 1.035377860069275, |
|
"eval_runtime": 1.7575, |
|
"eval_samples_per_second": 48.365, |
|
"eval_steps_per_second": 1.707, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 21.181818181818183, |
|
"grad_norm": 30.408321380615234, |
|
"learning_rate": 2.4305555555555558e-05, |
|
"loss": 0.5374, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 21.90909090909091, |
|
"grad_norm": 16.08924102783203, |
|
"learning_rate": 2.326388888888889e-05, |
|
"loss": 0.506, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.8117647058823529, |
|
"eval_loss": 1.109660029411316, |
|
"eval_runtime": 1.3601, |
|
"eval_samples_per_second": 62.497, |
|
"eval_steps_per_second": 2.206, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 22.606060606060606, |
|
"grad_norm": 19.899555206298828, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.3833, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.7764705882352941, |
|
"eval_loss": 1.2008516788482666, |
|
"eval_runtime": 1.3596, |
|
"eval_samples_per_second": 62.517, |
|
"eval_steps_per_second": 2.206, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 23.303030303030305, |
|
"grad_norm": 23.089004516601562, |
|
"learning_rate": 2.1180555555555556e-05, |
|
"loss": 0.5273, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"grad_norm": 12.893085479736328, |
|
"learning_rate": 2.013888888888889e-05, |
|
"loss": 0.4574, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.7764705882352941, |
|
"eval_loss": 1.1365725994110107, |
|
"eval_runtime": 1.3252, |
|
"eval_samples_per_second": 64.141, |
|
"eval_steps_per_second": 2.264, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 24.727272727272727, |
|
"grad_norm": 29.011215209960938, |
|
"learning_rate": 1.9097222222222222e-05, |
|
"loss": 0.4467, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.8117647058823529, |
|
"eval_loss": 1.0601016283035278, |
|
"eval_runtime": 1.4691, |
|
"eval_samples_per_second": 57.86, |
|
"eval_steps_per_second": 2.042, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 25.424242424242426, |
|
"grad_norm": 37.92557907104492, |
|
"learning_rate": 1.8055555555555555e-05, |
|
"loss": 0.4451, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.7764705882352941, |
|
"eval_loss": 1.0934613943099976, |
|
"eval_runtime": 1.3569, |
|
"eval_samples_per_second": 62.643, |
|
"eval_steps_per_second": 2.211, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 26.12121212121212, |
|
"grad_norm": 29.263154983520508, |
|
"learning_rate": 1.701388888888889e-05, |
|
"loss": 0.483, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 26.848484848484848, |
|
"grad_norm": 20.336957931518555, |
|
"learning_rate": 1.597222222222222e-05, |
|
"loss": 0.4384, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.7647058823529411, |
|
"eval_loss": 1.1616755723953247, |
|
"eval_runtime": 1.3388, |
|
"eval_samples_per_second": 63.492, |
|
"eval_steps_per_second": 2.241, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 27.545454545454547, |
|
"grad_norm": 26.435325622558594, |
|
"learning_rate": 1.4930555555555557e-05, |
|
"loss": 0.4321, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.7764705882352941, |
|
"eval_loss": 1.1012462377548218, |
|
"eval_runtime": 1.7645, |
|
"eval_samples_per_second": 48.171, |
|
"eval_steps_per_second": 1.7, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 28.242424242424242, |
|
"grad_norm": 35.24657440185547, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.4068, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 28.96969696969697, |
|
"grad_norm": 29.191991806030273, |
|
"learning_rate": 1.2847222222222222e-05, |
|
"loss": 0.4398, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.788235294117647, |
|
"eval_loss": 1.082492709159851, |
|
"eval_runtime": 1.3299, |
|
"eval_samples_per_second": 63.914, |
|
"eval_steps_per_second": 2.256, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 29.666666666666668, |
|
"grad_norm": 26.035062789916992, |
|
"learning_rate": 1.1805555555555555e-05, |
|
"loss": 0.361, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.7647058823529411, |
|
"eval_loss": 1.112740397453308, |
|
"eval_runtime": 1.3356, |
|
"eval_samples_per_second": 63.641, |
|
"eval_steps_per_second": 2.246, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 30.363636363636363, |
|
"grad_norm": 24.128007888793945, |
|
"learning_rate": 1.076388888888889e-05, |
|
"loss": 0.4428, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.7529411764705882, |
|
"eval_loss": 1.2024400234222412, |
|
"eval_runtime": 1.8084, |
|
"eval_samples_per_second": 47.003, |
|
"eval_steps_per_second": 1.659, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 31.060606060606062, |
|
"grad_norm": 29.159976959228516, |
|
"learning_rate": 9.722222222222223e-06, |
|
"loss": 0.4293, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 31.78787878787879, |
|
"grad_norm": 19.373197555541992, |
|
"learning_rate": 8.680555555555556e-06, |
|
"loss": 0.451, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.7647058823529411, |
|
"eval_loss": 1.1550030708312988, |
|
"eval_runtime": 1.3443, |
|
"eval_samples_per_second": 63.23, |
|
"eval_steps_per_second": 2.232, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 32.484848484848484, |
|
"grad_norm": 12.227431297302246, |
|
"learning_rate": 7.63888888888889e-06, |
|
"loss": 0.403, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.7764705882352941, |
|
"eval_loss": 1.1645594835281372, |
|
"eval_runtime": 1.3526, |
|
"eval_samples_per_second": 62.842, |
|
"eval_steps_per_second": 2.218, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 33.18181818181818, |
|
"grad_norm": 20.577377319335938, |
|
"learning_rate": 6.597222222222223e-06, |
|
"loss": 0.4113, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 33.90909090909091, |
|
"grad_norm": 24.493812561035156, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.3059, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.7764705882352941, |
|
"eval_loss": 1.2441880702972412, |
|
"eval_runtime": 1.3246, |
|
"eval_samples_per_second": 64.172, |
|
"eval_steps_per_second": 2.265, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 34.60606060606061, |
|
"grad_norm": 28.553544998168945, |
|
"learning_rate": 4.513888888888889e-06, |
|
"loss": 0.3022, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.7764705882352941, |
|
"eval_loss": 1.1975644826889038, |
|
"eval_runtime": 1.5644, |
|
"eval_samples_per_second": 54.334, |
|
"eval_steps_per_second": 1.918, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 35.303030303030305, |
|
"grad_norm": 17.071916580200195, |
|
"learning_rate": 3.4722222222222224e-06, |
|
"loss": 0.3343, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"grad_norm": 10.918073654174805, |
|
"learning_rate": 2.4305555555555557e-06, |
|
"loss": 0.319, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.7764705882352941, |
|
"eval_loss": 1.1564186811447144, |
|
"eval_runtime": 1.4741, |
|
"eval_samples_per_second": 57.663, |
|
"eval_steps_per_second": 2.035, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 36.72727272727273, |
|
"grad_norm": 8.741593360900879, |
|
"learning_rate": 1.388888888888889e-06, |
|
"loss": 0.3737, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.7764705882352941, |
|
"eval_loss": 1.1857171058654785, |
|
"eval_runtime": 1.3517, |
|
"eval_samples_per_second": 62.883, |
|
"eval_steps_per_second": 2.219, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 37.42424242424242, |
|
"grad_norm": 20.94017219543457, |
|
"learning_rate": 3.4722222222222224e-07, |
|
"loss": 0.3063, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 37.666666666666664, |
|
"eval_accuracy": 0.7764705882352941, |
|
"eval_loss": 1.193001627922058, |
|
"eval_runtime": 1.6167, |
|
"eval_samples_per_second": 52.578, |
|
"eval_steps_per_second": 1.856, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 37.666666666666664, |
|
"step": 640, |
|
"total_flos": 2.572737077098709e+18, |
|
"train_loss": 1.1635722614824773, |
|
"train_runtime": 1917.6947, |
|
"train_samples_per_second": 43.782, |
|
"train_steps_per_second": 0.334 |
|
} |
|
], |
|
"logging_steps": 12, |
|
"max_steps": 640, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 40, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.572737077098709e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|