{ "best_metric": 0.7207300066947937, "best_model_checkpoint": "/raildefectfft2/checkpoint-30", "epoch": 30.0, "global_step": 450, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.67, "learning_rate": 0.00019555555555555556, "loss": 1.3922, "step": 10 }, { "epoch": 0.67, "eval_accuracy": 0.6114285714285714, "eval_loss": 1.1690133810043335, "eval_runtime": 217.3963, "eval_samples_per_second": 1.61, "eval_steps_per_second": 0.202, "step": 10 }, { "epoch": 1.33, "learning_rate": 0.00019111111111111114, "loss": 0.8518, "step": 20 }, { "epoch": 1.33, "eval_accuracy": 0.6828571428571428, "eval_loss": 0.8874489068984985, "eval_runtime": 217.0024, "eval_samples_per_second": 1.613, "eval_steps_per_second": 0.203, "step": 20 }, { "epoch": 2.0, "learning_rate": 0.0001866666666666667, "loss": 0.5386, "step": 30 }, { "epoch": 2.0, "eval_accuracy": 0.7542857142857143, "eval_loss": 0.7207300066947937, "eval_runtime": 221.2829, "eval_samples_per_second": 1.582, "eval_steps_per_second": 0.199, "step": 30 }, { "epoch": 2.67, "learning_rate": 0.00018222222222222224, "loss": 0.3125, "step": 40 }, { "epoch": 2.67, "eval_accuracy": 0.7285714285714285, "eval_loss": 0.8382583260536194, "eval_runtime": 217.2612, "eval_samples_per_second": 1.611, "eval_steps_per_second": 0.203, "step": 40 }, { "epoch": 3.33, "learning_rate": 0.00017777777777777779, "loss": 0.2264, "step": 50 }, { "epoch": 3.33, "eval_accuracy": 0.7428571428571429, "eval_loss": 0.8440293669700623, "eval_runtime": 215.3358, "eval_samples_per_second": 1.625, "eval_steps_per_second": 0.204, "step": 50 }, { "epoch": 4.0, "learning_rate": 0.00017333333333333334, "loss": 0.1613, "step": 60 }, { "epoch": 4.0, "eval_accuracy": 0.7457142857142857, "eval_loss": 0.8516280055046082, "eval_runtime": 221.3905, "eval_samples_per_second": 1.581, "eval_steps_per_second": 0.199, "step": 60 }, { "epoch": 4.67, "learning_rate": 0.00016888888888888889, "loss": 0.119, "step": 70 }, { "epoch": 4.67, "eval_accuracy": 0.6, "eval_loss": 1.3625210523605347, "eval_runtime": 210.1415, "eval_samples_per_second": 1.666, "eval_steps_per_second": 0.209, "step": 70 }, { "epoch": 5.33, "learning_rate": 0.00016444444444444444, "loss": 0.0972, "step": 80 }, { "epoch": 5.33, "eval_accuracy": 0.7428571428571429, "eval_loss": 0.9109606146812439, "eval_runtime": 208.8901, "eval_samples_per_second": 1.676, "eval_steps_per_second": 0.211, "step": 80 }, { "epoch": 6.0, "learning_rate": 0.00016, "loss": 0.0844, "step": 90 }, { "epoch": 6.0, "eval_accuracy": 0.78, "eval_loss": 0.8271930813789368, "eval_runtime": 214.6994, "eval_samples_per_second": 1.63, "eval_steps_per_second": 0.205, "step": 90 }, { "epoch": 6.67, "learning_rate": 0.00015555555555555556, "loss": 0.0725, "step": 100 }, { "epoch": 6.67, "eval_accuracy": 0.74, "eval_loss": 0.8958377242088318, "eval_runtime": 209.0619, "eval_samples_per_second": 1.674, "eval_steps_per_second": 0.21, "step": 100 }, { "epoch": 7.33, "learning_rate": 0.0001511111111111111, "loss": 0.0708, "step": 110 }, { "epoch": 7.33, "eval_accuracy": 0.7371428571428571, "eval_loss": 1.0972360372543335, "eval_runtime": 210.819, "eval_samples_per_second": 1.66, "eval_steps_per_second": 0.209, "step": 110 }, { "epoch": 8.0, "learning_rate": 0.00014666666666666666, "loss": 0.041, "step": 120 }, { "epoch": 8.0, "eval_accuracy": 0.7628571428571429, "eval_loss": 1.0088900327682495, "eval_runtime": 209.2911, "eval_samples_per_second": 1.672, "eval_steps_per_second": 0.21, "step": 120 }, { "epoch": 8.67, "learning_rate": 0.00014222222222222224, "loss": 0.0312, "step": 130 }, { "epoch": 8.67, "eval_accuracy": 0.7628571428571429, "eval_loss": 1.03481125831604, "eval_runtime": 215.9471, "eval_samples_per_second": 1.621, "eval_steps_per_second": 0.204, "step": 130 }, { "epoch": 9.33, "learning_rate": 0.0001377777777777778, "loss": 0.0401, "step": 140 }, { "epoch": 9.33, "eval_accuracy": 0.7257142857142858, "eval_loss": 1.2426719665527344, "eval_runtime": 211.184, "eval_samples_per_second": 1.657, "eval_steps_per_second": 0.208, "step": 140 }, { "epoch": 10.0, "learning_rate": 0.00013333333333333334, "loss": 0.0271, "step": 150 }, { "epoch": 10.0, "eval_accuracy": 0.7542857142857143, "eval_loss": 1.0153539180755615, "eval_runtime": 208.7196, "eval_samples_per_second": 1.677, "eval_steps_per_second": 0.211, "step": 150 }, { "epoch": 10.67, "learning_rate": 0.00012888888888888892, "loss": 0.0328, "step": 160 }, { "epoch": 10.67, "eval_accuracy": 0.7714285714285715, "eval_loss": 1.0373210906982422, "eval_runtime": 216.803, "eval_samples_per_second": 1.614, "eval_steps_per_second": 0.203, "step": 160 }, { "epoch": 11.33, "learning_rate": 0.00012444444444444444, "loss": 0.023, "step": 170 }, { "epoch": 11.33, "eval_accuracy": 0.7685714285714286, "eval_loss": 1.005110502243042, "eval_runtime": 209.183, "eval_samples_per_second": 1.673, "eval_steps_per_second": 0.21, "step": 170 }, { "epoch": 12.0, "learning_rate": 0.00012, "loss": 0.0199, "step": 180 }, { "epoch": 12.0, "eval_accuracy": 0.7657142857142857, "eval_loss": 0.9775477647781372, "eval_runtime": 208.6639, "eval_samples_per_second": 1.677, "eval_steps_per_second": 0.211, "step": 180 }, { "epoch": 12.67, "learning_rate": 0.00011555555555555555, "loss": 0.0189, "step": 190 }, { "epoch": 12.67, "eval_accuracy": 0.7657142857142857, "eval_loss": 1.008815050125122, "eval_runtime": 216.3653, "eval_samples_per_second": 1.618, "eval_steps_per_second": 0.203, "step": 190 }, { "epoch": 13.33, "learning_rate": 0.00011111111111111112, "loss": 0.0188, "step": 200 }, { "epoch": 13.33, "eval_accuracy": 0.7342857142857143, "eval_loss": 1.1904319524765015, "eval_runtime": 209.8186, "eval_samples_per_second": 1.668, "eval_steps_per_second": 0.21, "step": 200 }, { "epoch": 14.0, "learning_rate": 0.00010666666666666667, "loss": 0.0167, "step": 210 }, { "epoch": 14.0, "eval_accuracy": 0.7285714285714285, "eval_loss": 1.2999135255813599, "eval_runtime": 209.7587, "eval_samples_per_second": 1.669, "eval_steps_per_second": 0.21, "step": 210 }, { "epoch": 14.67, "learning_rate": 0.00010222222222222222, "loss": 0.0159, "step": 220 }, { "epoch": 14.67, "eval_accuracy": 0.7514285714285714, "eval_loss": 1.1326370239257812, "eval_runtime": 215.8574, "eval_samples_per_second": 1.621, "eval_steps_per_second": 0.204, "step": 220 }, { "epoch": 15.33, "learning_rate": 9.777777777777778e-05, "loss": 0.0145, "step": 230 }, { "epoch": 15.33, "eval_accuracy": 0.7542857142857143, "eval_loss": 1.1385791301727295, "eval_runtime": 209.3052, "eval_samples_per_second": 1.672, "eval_steps_per_second": 0.21, "step": 230 }, { "epoch": 16.0, "learning_rate": 9.333333333333334e-05, "loss": 0.015, "step": 240 }, { "epoch": 16.0, "eval_accuracy": 0.7542857142857143, "eval_loss": 1.1441489458084106, "eval_runtime": 209.1403, "eval_samples_per_second": 1.674, "eval_steps_per_second": 0.21, "step": 240 }, { "epoch": 16.67, "learning_rate": 8.888888888888889e-05, "loss": 0.0133, "step": 250 }, { "epoch": 16.67, "eval_accuracy": 0.7514285714285714, "eval_loss": 1.154445767402649, "eval_runtime": 212.9184, "eval_samples_per_second": 1.644, "eval_steps_per_second": 0.207, "step": 250 }, { "epoch": 17.33, "learning_rate": 8.444444444444444e-05, "loss": 0.0132, "step": 260 }, { "epoch": 17.33, "eval_accuracy": 0.7514285714285714, "eval_loss": 1.1629431247711182, "eval_runtime": 209.529, "eval_samples_per_second": 1.67, "eval_steps_per_second": 0.21, "step": 260 }, { "epoch": 18.0, "learning_rate": 8e-05, "loss": 0.0121, "step": 270 }, { "epoch": 18.0, "eval_accuracy": 0.7514285714285714, "eval_loss": 1.1707779169082642, "eval_runtime": 209.7822, "eval_samples_per_second": 1.668, "eval_steps_per_second": 0.21, "step": 270 }, { "epoch": 18.67, "learning_rate": 7.555555555555556e-05, "loss": 0.0121, "step": 280 }, { "epoch": 18.67, "eval_accuracy": 0.7514285714285714, "eval_loss": 1.1773394346237183, "eval_runtime": 220.9849, "eval_samples_per_second": 1.584, "eval_steps_per_second": 0.199, "step": 280 }, { "epoch": 19.33, "learning_rate": 7.111111111111112e-05, "loss": 0.0114, "step": 290 }, { "epoch": 19.33, "eval_accuracy": 0.7514285714285714, "eval_loss": 1.1831494569778442, "eval_runtime": 216.1426, "eval_samples_per_second": 1.619, "eval_steps_per_second": 0.204, "step": 290 }, { "epoch": 20.0, "learning_rate": 6.666666666666667e-05, "loss": 0.0111, "step": 300 }, { "epoch": 20.0, "eval_accuracy": 0.7514285714285714, "eval_loss": 1.1883198022842407, "eval_runtime": 210.1269, "eval_samples_per_second": 1.666, "eval_steps_per_second": 0.209, "step": 300 }, { "epoch": 20.67, "learning_rate": 6.222222222222222e-05, "loss": 0.011, "step": 310 }, { "epoch": 20.67, "eval_accuracy": 0.7514285714285714, "eval_loss": 1.1936721801757812, "eval_runtime": 210.5377, "eval_samples_per_second": 1.662, "eval_steps_per_second": 0.209, "step": 310 }, { "epoch": 21.33, "learning_rate": 5.7777777777777776e-05, "loss": 0.0103, "step": 320 }, { "epoch": 21.33, "eval_accuracy": 0.7514285714285714, "eval_loss": 1.1992676258087158, "eval_runtime": 214.4415, "eval_samples_per_second": 1.632, "eval_steps_per_second": 0.205, "step": 320 }, { "epoch": 22.0, "learning_rate": 5.333333333333333e-05, "loss": 0.0103, "step": 330 }, { "epoch": 22.0, "eval_accuracy": 0.7514285714285714, "eval_loss": 1.2045563459396362, "eval_runtime": 211.851, "eval_samples_per_second": 1.652, "eval_steps_per_second": 0.208, "step": 330 }, { "epoch": 22.67, "learning_rate": 4.888888888888889e-05, "loss": 0.0103, "step": 340 }, { "epoch": 22.67, "eval_accuracy": 0.7514285714285714, "eval_loss": 1.208925724029541, "eval_runtime": 209.6112, "eval_samples_per_second": 1.67, "eval_steps_per_second": 0.21, "step": 340 }, { "epoch": 23.33, "learning_rate": 4.4444444444444447e-05, "loss": 0.0096, "step": 350 }, { "epoch": 23.33, "eval_accuracy": 0.7514285714285714, "eval_loss": 1.2132576704025269, "eval_runtime": 219.2216, "eval_samples_per_second": 1.597, "eval_steps_per_second": 0.201, "step": 350 }, { "epoch": 24.0, "learning_rate": 4e-05, "loss": 0.0095, "step": 360 }, { "epoch": 24.0, "eval_accuracy": 0.7514285714285714, "eval_loss": 1.2171136140823364, "eval_runtime": 208.9362, "eval_samples_per_second": 1.675, "eval_steps_per_second": 0.211, "step": 360 }, { "epoch": 24.67, "learning_rate": 3.555555555555556e-05, "loss": 0.0096, "step": 370 }, { "epoch": 24.67, "eval_accuracy": 0.7514285714285714, "eval_loss": 1.2204023599624634, "eval_runtime": 209.9857, "eval_samples_per_second": 1.667, "eval_steps_per_second": 0.21, "step": 370 }, { "epoch": 25.33, "learning_rate": 3.111111111111111e-05, "loss": 0.0093, "step": 380 }, { "epoch": 25.33, "eval_accuracy": 0.7485714285714286, "eval_loss": 1.2234961986541748, "eval_runtime": 216.2311, "eval_samples_per_second": 1.619, "eval_steps_per_second": 0.203, "step": 380 }, { "epoch": 26.0, "learning_rate": 2.6666666666666667e-05, "loss": 0.0091, "step": 390 }, { "epoch": 26.0, "eval_accuracy": 0.7485714285714286, "eval_loss": 1.2261521816253662, "eval_runtime": 210.1553, "eval_samples_per_second": 1.665, "eval_steps_per_second": 0.209, "step": 390 }, { "epoch": 26.67, "learning_rate": 2.2222222222222223e-05, "loss": 0.0092, "step": 400 }, { "epoch": 26.67, "eval_accuracy": 0.7514285714285714, "eval_loss": 1.227960228919983, "eval_runtime": 211.3289, "eval_samples_per_second": 1.656, "eval_steps_per_second": 0.208, "step": 400 }, { "epoch": 27.33, "learning_rate": 1.777777777777778e-05, "loss": 0.0089, "step": 410 }, { "epoch": 27.33, "eval_accuracy": 0.7514285714285714, "eval_loss": 1.2296239137649536, "eval_runtime": 213.6977, "eval_samples_per_second": 1.638, "eval_steps_per_second": 0.206, "step": 410 }, { "epoch": 28.0, "learning_rate": 1.3333333333333333e-05, "loss": 0.0092, "step": 420 }, { "epoch": 28.0, "eval_accuracy": 0.7514285714285714, "eval_loss": 1.2309640645980835, "eval_runtime": 210.9587, "eval_samples_per_second": 1.659, "eval_steps_per_second": 0.209, "step": 420 }, { "epoch": 28.67, "learning_rate": 8.88888888888889e-06, "loss": 0.0089, "step": 430 }, { "epoch": 28.67, "eval_accuracy": 0.7485714285714286, "eval_loss": 1.2319449186325073, "eval_runtime": 210.158, "eval_samples_per_second": 1.665, "eval_steps_per_second": 0.209, "step": 430 }, { "epoch": 29.33, "learning_rate": 4.444444444444445e-06, "loss": 0.0089, "step": 440 }, { "epoch": 29.33, "eval_accuracy": 0.7485714285714286, "eval_loss": 1.23252272605896, "eval_runtime": 216.1534, "eval_samples_per_second": 1.619, "eval_steps_per_second": 0.204, "step": 440 }, { "epoch": 30.0, "learning_rate": 0.0, "loss": 0.0088, "step": 450 }, { "epoch": 30.0, "eval_accuracy": 0.7485714285714286, "eval_loss": 1.2327271699905396, "eval_runtime": 216.0649, "eval_samples_per_second": 1.62, "eval_steps_per_second": 0.204, "step": 450 }, { "epoch": 30.0, "step": 450, "total_flos": 9.8805828893184e+17, "train_loss": 0.09906705205639203, "train_runtime": 36217.9644, "train_samples_per_second": 0.352, "train_steps_per_second": 0.012 } ], "max_steps": 450, "num_train_epochs": 30, "total_flos": 9.8805828893184e+17, "trial_name": null, "trial_params": null }