|
{ |
|
"best_metric": 0.7207300066947937, |
|
"best_model_checkpoint": "/raildefectfft2/checkpoint-30", |
|
"epoch": 30.0, |
|
"global_step": 450, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00019555555555555556, |
|
"loss": 1.3922, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_accuracy": 0.6114285714285714, |
|
"eval_loss": 1.1690133810043335, |
|
"eval_runtime": 217.3963, |
|
"eval_samples_per_second": 1.61, |
|
"eval_steps_per_second": 0.202, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.00019111111111111114, |
|
"loss": 0.8518, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_accuracy": 0.6828571428571428, |
|
"eval_loss": 0.8874489068984985, |
|
"eval_runtime": 217.0024, |
|
"eval_samples_per_second": 1.613, |
|
"eval_steps_per_second": 0.203, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.0001866666666666667, |
|
"loss": 0.5386, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7542857142857143, |
|
"eval_loss": 0.7207300066947937, |
|
"eval_runtime": 221.2829, |
|
"eval_samples_per_second": 1.582, |
|
"eval_steps_per_second": 0.199, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 0.00018222222222222224, |
|
"loss": 0.3125, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"eval_accuracy": 0.7285714285714285, |
|
"eval_loss": 0.8382583260536194, |
|
"eval_runtime": 217.2612, |
|
"eval_samples_per_second": 1.611, |
|
"eval_steps_per_second": 0.203, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 0.00017777777777777779, |
|
"loss": 0.2264, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"eval_accuracy": 0.7428571428571429, |
|
"eval_loss": 0.8440293669700623, |
|
"eval_runtime": 215.3358, |
|
"eval_samples_per_second": 1.625, |
|
"eval_steps_per_second": 0.204, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.00017333333333333334, |
|
"loss": 0.1613, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7457142857142857, |
|
"eval_loss": 0.8516280055046082, |
|
"eval_runtime": 221.3905, |
|
"eval_samples_per_second": 1.581, |
|
"eval_steps_per_second": 0.199, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 0.00016888888888888889, |
|
"loss": 0.119, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 1.3625210523605347, |
|
"eval_runtime": 210.1415, |
|
"eval_samples_per_second": 1.666, |
|
"eval_steps_per_second": 0.209, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 0.00016444444444444444, |
|
"loss": 0.0972, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"eval_accuracy": 0.7428571428571429, |
|
"eval_loss": 0.9109606146812439, |
|
"eval_runtime": 208.8901, |
|
"eval_samples_per_second": 1.676, |
|
"eval_steps_per_second": 0.211, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 0.00016, |
|
"loss": 0.0844, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.78, |
|
"eval_loss": 0.8271930813789368, |
|
"eval_runtime": 214.6994, |
|
"eval_samples_per_second": 1.63, |
|
"eval_steps_per_second": 0.205, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 0.00015555555555555556, |
|
"loss": 0.0725, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"eval_accuracy": 0.74, |
|
"eval_loss": 0.8958377242088318, |
|
"eval_runtime": 209.0619, |
|
"eval_samples_per_second": 1.674, |
|
"eval_steps_per_second": 0.21, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 0.0001511111111111111, |
|
"loss": 0.0708, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"eval_accuracy": 0.7371428571428571, |
|
"eval_loss": 1.0972360372543335, |
|
"eval_runtime": 210.819, |
|
"eval_samples_per_second": 1.66, |
|
"eval_steps_per_second": 0.209, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 0.00014666666666666666, |
|
"loss": 0.041, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7628571428571429, |
|
"eval_loss": 1.0088900327682495, |
|
"eval_runtime": 209.2911, |
|
"eval_samples_per_second": 1.672, |
|
"eval_steps_per_second": 0.21, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 8.67, |
|
"learning_rate": 0.00014222222222222224, |
|
"loss": 0.0312, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 8.67, |
|
"eval_accuracy": 0.7628571428571429, |
|
"eval_loss": 1.03481125831604, |
|
"eval_runtime": 215.9471, |
|
"eval_samples_per_second": 1.621, |
|
"eval_steps_per_second": 0.204, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"learning_rate": 0.0001377777777777778, |
|
"loss": 0.0401, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"eval_accuracy": 0.7257142857142858, |
|
"eval_loss": 1.2426719665527344, |
|
"eval_runtime": 211.184, |
|
"eval_samples_per_second": 1.657, |
|
"eval_steps_per_second": 0.208, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.00013333333333333334, |
|
"loss": 0.0271, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7542857142857143, |
|
"eval_loss": 1.0153539180755615, |
|
"eval_runtime": 208.7196, |
|
"eval_samples_per_second": 1.677, |
|
"eval_steps_per_second": 0.211, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 10.67, |
|
"learning_rate": 0.00012888888888888892, |
|
"loss": 0.0328, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 10.67, |
|
"eval_accuracy": 0.7714285714285715, |
|
"eval_loss": 1.0373210906982422, |
|
"eval_runtime": 216.803, |
|
"eval_samples_per_second": 1.614, |
|
"eval_steps_per_second": 0.203, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 11.33, |
|
"learning_rate": 0.00012444444444444444, |
|
"loss": 0.023, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 11.33, |
|
"eval_accuracy": 0.7685714285714286, |
|
"eval_loss": 1.005110502243042, |
|
"eval_runtime": 209.183, |
|
"eval_samples_per_second": 1.673, |
|
"eval_steps_per_second": 0.21, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 0.00012, |
|
"loss": 0.0199, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7657142857142857, |
|
"eval_loss": 0.9775477647781372, |
|
"eval_runtime": 208.6639, |
|
"eval_samples_per_second": 1.677, |
|
"eval_steps_per_second": 0.211, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 12.67, |
|
"learning_rate": 0.00011555555555555555, |
|
"loss": 0.0189, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 12.67, |
|
"eval_accuracy": 0.7657142857142857, |
|
"eval_loss": 1.008815050125122, |
|
"eval_runtime": 216.3653, |
|
"eval_samples_per_second": 1.618, |
|
"eval_steps_per_second": 0.203, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"learning_rate": 0.00011111111111111112, |
|
"loss": 0.0188, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"eval_accuracy": 0.7342857142857143, |
|
"eval_loss": 1.1904319524765015, |
|
"eval_runtime": 209.8186, |
|
"eval_samples_per_second": 1.668, |
|
"eval_steps_per_second": 0.21, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 0.00010666666666666667, |
|
"loss": 0.0167, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.7285714285714285, |
|
"eval_loss": 1.2999135255813599, |
|
"eval_runtime": 209.7587, |
|
"eval_samples_per_second": 1.669, |
|
"eval_steps_per_second": 0.21, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 14.67, |
|
"learning_rate": 0.00010222222222222222, |
|
"loss": 0.0159, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 14.67, |
|
"eval_accuracy": 0.7514285714285714, |
|
"eval_loss": 1.1326370239257812, |
|
"eval_runtime": 215.8574, |
|
"eval_samples_per_second": 1.621, |
|
"eval_steps_per_second": 0.204, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 15.33, |
|
"learning_rate": 9.777777777777778e-05, |
|
"loss": 0.0145, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 15.33, |
|
"eval_accuracy": 0.7542857142857143, |
|
"eval_loss": 1.1385791301727295, |
|
"eval_runtime": 209.3052, |
|
"eval_samples_per_second": 1.672, |
|
"eval_steps_per_second": 0.21, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 9.333333333333334e-05, |
|
"loss": 0.015, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.7542857142857143, |
|
"eval_loss": 1.1441489458084106, |
|
"eval_runtime": 209.1403, |
|
"eval_samples_per_second": 1.674, |
|
"eval_steps_per_second": 0.21, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"learning_rate": 8.888888888888889e-05, |
|
"loss": 0.0133, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"eval_accuracy": 0.7514285714285714, |
|
"eval_loss": 1.154445767402649, |
|
"eval_runtime": 212.9184, |
|
"eval_samples_per_second": 1.644, |
|
"eval_steps_per_second": 0.207, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 17.33, |
|
"learning_rate": 8.444444444444444e-05, |
|
"loss": 0.0132, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 17.33, |
|
"eval_accuracy": 0.7514285714285714, |
|
"eval_loss": 1.1629431247711182, |
|
"eval_runtime": 209.529, |
|
"eval_samples_per_second": 1.67, |
|
"eval_steps_per_second": 0.21, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 8e-05, |
|
"loss": 0.0121, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.7514285714285714, |
|
"eval_loss": 1.1707779169082642, |
|
"eval_runtime": 209.7822, |
|
"eval_samples_per_second": 1.668, |
|
"eval_steps_per_second": 0.21, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 18.67, |
|
"learning_rate": 7.555555555555556e-05, |
|
"loss": 0.0121, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 18.67, |
|
"eval_accuracy": 0.7514285714285714, |
|
"eval_loss": 1.1773394346237183, |
|
"eval_runtime": 220.9849, |
|
"eval_samples_per_second": 1.584, |
|
"eval_steps_per_second": 0.199, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 19.33, |
|
"learning_rate": 7.111111111111112e-05, |
|
"loss": 0.0114, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 19.33, |
|
"eval_accuracy": 0.7514285714285714, |
|
"eval_loss": 1.1831494569778442, |
|
"eval_runtime": 216.1426, |
|
"eval_samples_per_second": 1.619, |
|
"eval_steps_per_second": 0.204, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 0.0111, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.7514285714285714, |
|
"eval_loss": 1.1883198022842407, |
|
"eval_runtime": 210.1269, |
|
"eval_samples_per_second": 1.666, |
|
"eval_steps_per_second": 0.209, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 20.67, |
|
"learning_rate": 6.222222222222222e-05, |
|
"loss": 0.011, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 20.67, |
|
"eval_accuracy": 0.7514285714285714, |
|
"eval_loss": 1.1936721801757812, |
|
"eval_runtime": 210.5377, |
|
"eval_samples_per_second": 1.662, |
|
"eval_steps_per_second": 0.209, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 21.33, |
|
"learning_rate": 5.7777777777777776e-05, |
|
"loss": 0.0103, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 21.33, |
|
"eval_accuracy": 0.7514285714285714, |
|
"eval_loss": 1.1992676258087158, |
|
"eval_runtime": 214.4415, |
|
"eval_samples_per_second": 1.632, |
|
"eval_steps_per_second": 0.205, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 5.333333333333333e-05, |
|
"loss": 0.0103, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.7514285714285714, |
|
"eval_loss": 1.2045563459396362, |
|
"eval_runtime": 211.851, |
|
"eval_samples_per_second": 1.652, |
|
"eval_steps_per_second": 0.208, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 22.67, |
|
"learning_rate": 4.888888888888889e-05, |
|
"loss": 0.0103, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 22.67, |
|
"eval_accuracy": 0.7514285714285714, |
|
"eval_loss": 1.208925724029541, |
|
"eval_runtime": 209.6112, |
|
"eval_samples_per_second": 1.67, |
|
"eval_steps_per_second": 0.21, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 23.33, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.0096, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 23.33, |
|
"eval_accuracy": 0.7514285714285714, |
|
"eval_loss": 1.2132576704025269, |
|
"eval_runtime": 219.2216, |
|
"eval_samples_per_second": 1.597, |
|
"eval_steps_per_second": 0.201, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 4e-05, |
|
"loss": 0.0095, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.7514285714285714, |
|
"eval_loss": 1.2171136140823364, |
|
"eval_runtime": 208.9362, |
|
"eval_samples_per_second": 1.675, |
|
"eval_steps_per_second": 0.211, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 24.67, |
|
"learning_rate": 3.555555555555556e-05, |
|
"loss": 0.0096, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 24.67, |
|
"eval_accuracy": 0.7514285714285714, |
|
"eval_loss": 1.2204023599624634, |
|
"eval_runtime": 209.9857, |
|
"eval_samples_per_second": 1.667, |
|
"eval_steps_per_second": 0.21, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 25.33, |
|
"learning_rate": 3.111111111111111e-05, |
|
"loss": 0.0093, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 25.33, |
|
"eval_accuracy": 0.7485714285714286, |
|
"eval_loss": 1.2234961986541748, |
|
"eval_runtime": 216.2311, |
|
"eval_samples_per_second": 1.619, |
|
"eval_steps_per_second": 0.203, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 0.0091, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.7485714285714286, |
|
"eval_loss": 1.2261521816253662, |
|
"eval_runtime": 210.1553, |
|
"eval_samples_per_second": 1.665, |
|
"eval_steps_per_second": 0.209, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 26.67, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.0092, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 26.67, |
|
"eval_accuracy": 0.7514285714285714, |
|
"eval_loss": 1.227960228919983, |
|
"eval_runtime": 211.3289, |
|
"eval_samples_per_second": 1.656, |
|
"eval_steps_per_second": 0.208, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 27.33, |
|
"learning_rate": 1.777777777777778e-05, |
|
"loss": 0.0089, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 27.33, |
|
"eval_accuracy": 0.7514285714285714, |
|
"eval_loss": 1.2296239137649536, |
|
"eval_runtime": 213.6977, |
|
"eval_samples_per_second": 1.638, |
|
"eval_steps_per_second": 0.206, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.0092, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.7514285714285714, |
|
"eval_loss": 1.2309640645980835, |
|
"eval_runtime": 210.9587, |
|
"eval_samples_per_second": 1.659, |
|
"eval_steps_per_second": 0.209, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 28.67, |
|
"learning_rate": 8.88888888888889e-06, |
|
"loss": 0.0089, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 28.67, |
|
"eval_accuracy": 0.7485714285714286, |
|
"eval_loss": 1.2319449186325073, |
|
"eval_runtime": 210.158, |
|
"eval_samples_per_second": 1.665, |
|
"eval_steps_per_second": 0.209, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 29.33, |
|
"learning_rate": 4.444444444444445e-06, |
|
"loss": 0.0089, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 29.33, |
|
"eval_accuracy": 0.7485714285714286, |
|
"eval_loss": 1.23252272605896, |
|
"eval_runtime": 216.1534, |
|
"eval_samples_per_second": 1.619, |
|
"eval_steps_per_second": 0.204, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.0088, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.7485714285714286, |
|
"eval_loss": 1.2327271699905396, |
|
"eval_runtime": 216.0649, |
|
"eval_samples_per_second": 1.62, |
|
"eval_steps_per_second": 0.204, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"step": 450, |
|
"total_flos": 9.8805828893184e+17, |
|
"train_loss": 0.09906705205639203, |
|
"train_runtime": 36217.9644, |
|
"train_samples_per_second": 0.352, |
|
"train_steps_per_second": 0.012 |
|
} |
|
], |
|
"max_steps": 450, |
|
"num_train_epochs": 30, |
|
"total_flos": 9.8805828893184e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|