|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"global_step": 2188, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.997714808043876e-05, |
|
"loss": 5.4892, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9017367458866546e-05, |
|
"loss": 3.0607, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_accuracy": 0.8749333333333333, |
|
"eval_f1": 0.8729341641831482, |
|
"eval_loss": 2.1355409622192383, |
|
"eval_precision": 0.887114537444934, |
|
"eval_recall": 0.8592, |
|
"eval_runtime": 581.4545, |
|
"eval_samples_per_second": 6.449, |
|
"eval_steps_per_second": 0.807, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.8034734917733096e-05, |
|
"loss": 1.6864, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_accuracy": 0.8901333333333333, |
|
"eval_f1": 0.8857459789240155, |
|
"eval_loss": 1.632243275642395, |
|
"eval_precision": 0.9225880993645291, |
|
"eval_recall": 0.8517333333333333, |
|
"eval_runtime": 581.1642, |
|
"eval_samples_per_second": 6.453, |
|
"eval_steps_per_second": 0.807, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.705210237659964e-05, |
|
"loss": 1.0658, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_accuracy": 0.904, |
|
"eval_f1": 0.9061032863849765, |
|
"eval_loss": 0.3131539821624756, |
|
"eval_precision": 0.886676875957121, |
|
"eval_recall": 0.9264, |
|
"eval_runtime": 580.6614, |
|
"eval_samples_per_second": 6.458, |
|
"eval_steps_per_second": 0.808, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.606946983546618e-05, |
|
"loss": 0.4275, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_accuracy": 0.8874666666666666, |
|
"eval_f1": 0.8765359859566997, |
|
"eval_loss": 1.6054295301437378, |
|
"eval_precision": 0.9708360337005832, |
|
"eval_recall": 0.7989333333333334, |
|
"eval_runtime": 580.8161, |
|
"eval_samples_per_second": 6.456, |
|
"eval_steps_per_second": 0.807, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.5086837294332726e-05, |
|
"loss": 0.5456, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.9170666666666667, |
|
"eval_f1": 0.9218789248932429, |
|
"eval_loss": 0.41803765296936035, |
|
"eval_precision": 0.8713200379867047, |
|
"eval_recall": 0.9786666666666667, |
|
"eval_runtime": 580.4549, |
|
"eval_samples_per_second": 6.46, |
|
"eval_steps_per_second": 0.808, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.410420475319927e-05, |
|
"loss": 0.6111, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_accuracy": 0.8786666666666667, |
|
"eval_f1": 0.8626622396619378, |
|
"eval_loss": 0.32219210267066956, |
|
"eval_precision": 0.9937413073713491, |
|
"eval_recall": 0.7621333333333333, |
|
"eval_runtime": 584.0808, |
|
"eval_samples_per_second": 6.42, |
|
"eval_steps_per_second": 0.803, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.312157221206581e-05, |
|
"loss": 0.7106, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.9274666666666667, |
|
"eval_f1": 0.9221967963386728, |
|
"eval_loss": 0.5022754073143005, |
|
"eval_precision": 0.9944478716841456, |
|
"eval_recall": 0.8597333333333333, |
|
"eval_runtime": 581.6225, |
|
"eval_samples_per_second": 6.447, |
|
"eval_steps_per_second": 0.806, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.213893967093236e-05, |
|
"loss": 0.2426, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_accuracy": 0.9586666666666667, |
|
"eval_f1": 0.9586997069011457, |
|
"eval_loss": 0.16574496030807495, |
|
"eval_precision": 0.9579339723109691, |
|
"eval_recall": 0.9594666666666667, |
|
"eval_runtime": 580.7975, |
|
"eval_samples_per_second": 6.457, |
|
"eval_steps_per_second": 0.808, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.1156307129798905e-05, |
|
"loss": 0.3958, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_accuracy": 0.9554666666666667, |
|
"eval_f1": 0.9535724214623298, |
|
"eval_loss": 0.24226774275302887, |
|
"eval_precision": 0.9959349593495935, |
|
"eval_recall": 0.9146666666666666, |
|
"eval_runtime": 581.1794, |
|
"eval_samples_per_second": 6.452, |
|
"eval_steps_per_second": 0.807, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.017367458866545e-05, |
|
"loss": 0.4259, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.9448, |
|
"eval_f1": 0.9449614464238233, |
|
"eval_loss": 0.3525341749191284, |
|
"eval_precision": 0.9422057264050901, |
|
"eval_recall": 0.9477333333333333, |
|
"eval_runtime": 580.7809, |
|
"eval_samples_per_second": 6.457, |
|
"eval_steps_per_second": 0.808, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.919104204753199e-05, |
|
"loss": 0.2217, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_accuracy": 0.9549333333333333, |
|
"eval_f1": 0.9531466592736346, |
|
"eval_loss": 0.22800709307193756, |
|
"eval_precision": 0.9924942263279446, |
|
"eval_recall": 0.9168, |
|
"eval_runtime": 580.8019, |
|
"eval_samples_per_second": 6.457, |
|
"eval_steps_per_second": 0.808, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.820840950639854e-05, |
|
"loss": 0.1741, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.9682666666666667, |
|
"eval_f1": 0.9673525377229081, |
|
"eval_loss": 0.15350370109081268, |
|
"eval_precision": 0.996045197740113, |
|
"eval_recall": 0.9402666666666667, |
|
"eval_runtime": 584.2385, |
|
"eval_samples_per_second": 6.419, |
|
"eval_steps_per_second": 0.803, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.7225776965265085e-05, |
|
"loss": 0.1731, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_accuracy": 0.9648, |
|
"eval_f1": 0.9636763896532745, |
|
"eval_loss": 0.29772499203681946, |
|
"eval_precision": 0.9954519613416714, |
|
"eval_recall": 0.9338666666666666, |
|
"eval_runtime": 580.9188, |
|
"eval_samples_per_second": 6.455, |
|
"eval_steps_per_second": 0.807, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.624314442413163e-05, |
|
"loss": 0.1857, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_accuracy": 0.9826666666666667, |
|
"eval_f1": 0.9826527888977848, |
|
"eval_loss": 0.16285915672779083, |
|
"eval_precision": 0.9834401709401709, |
|
"eval_recall": 0.9818666666666667, |
|
"eval_runtime": 581.4605, |
|
"eval_samples_per_second": 6.449, |
|
"eval_steps_per_second": 0.807, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.526051188299818e-05, |
|
"loss": 0.241, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_accuracy": 0.9677333333333333, |
|
"eval_f1": 0.9681997371879106, |
|
"eval_loss": 0.30683064460754395, |
|
"eval_precision": 0.9544041450777202, |
|
"eval_recall": 0.9824, |
|
"eval_runtime": 580.3999, |
|
"eval_samples_per_second": 6.461, |
|
"eval_steps_per_second": 0.808, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.4277879341864715e-05, |
|
"loss": 0.1674, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_accuracy": 0.9050666666666667, |
|
"eval_f1": 0.9130434782608695, |
|
"eval_loss": 0.5339795351028442, |
|
"eval_precision": 0.8422712933753943, |
|
"eval_recall": 0.9968, |
|
"eval_runtime": 580.4617, |
|
"eval_samples_per_second": 6.46, |
|
"eval_steps_per_second": 0.808, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.329524680073126e-05, |
|
"loss": 0.1452, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_accuracy": 0.9754666666666667, |
|
"eval_f1": 0.9756871035940803, |
|
"eval_loss": 0.1770327091217041, |
|
"eval_precision": 0.9669984284965951, |
|
"eval_recall": 0.9845333333333334, |
|
"eval_runtime": 581.5384, |
|
"eval_samples_per_second": 6.448, |
|
"eval_steps_per_second": 0.806, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.231261425959781e-05, |
|
"loss": 0.2261, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_accuracy": 0.9714666666666667, |
|
"eval_f1": 0.97192337969037, |
|
"eval_loss": 0.2083805948495865, |
|
"eval_precision": 0.9566115702479339, |
|
"eval_recall": 0.9877333333333334, |
|
"eval_runtime": 582.4902, |
|
"eval_samples_per_second": 6.438, |
|
"eval_steps_per_second": 0.805, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.132998171846435e-05, |
|
"loss": 0.1849, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_accuracy": 0.9765333333333334, |
|
"eval_f1": 0.9763186221743809, |
|
"eval_loss": 0.12196581810712814, |
|
"eval_precision": 0.9853340575774036, |
|
"eval_recall": 0.9674666666666667, |
|
"eval_runtime": 578.7143, |
|
"eval_samples_per_second": 6.48, |
|
"eval_steps_per_second": 0.81, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.03473491773309e-05, |
|
"loss": 0.1237, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_accuracy": 0.9693333333333334, |
|
"eval_f1": 0.9700286682303884, |
|
"eval_loss": 0.2138151079416275, |
|
"eval_precision": 0.9485219164118247, |
|
"eval_recall": 0.9925333333333334, |
|
"eval_runtime": 578.8904, |
|
"eval_samples_per_second": 6.478, |
|
"eval_steps_per_second": 0.81, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.9364716636197442e-05, |
|
"loss": 0.1627, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_accuracy": 0.9584, |
|
"eval_f1": 0.9566425792106726, |
|
"eval_loss": 0.254962295293808, |
|
"eval_precision": 0.9988392338943702, |
|
"eval_recall": 0.9178666666666667, |
|
"eval_runtime": 578.6033, |
|
"eval_samples_per_second": 6.481, |
|
"eval_steps_per_second": 0.811, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.838208409506399e-05, |
|
"loss": 0.2477, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_accuracy": 0.9765333333333334, |
|
"eval_f1": 0.9761258817145958, |
|
"eval_loss": 0.13448475301265717, |
|
"eval_precision": 0.9933738266151297, |
|
"eval_recall": 0.9594666666666667, |
|
"eval_runtime": 578.5049, |
|
"eval_samples_per_second": 6.482, |
|
"eval_steps_per_second": 0.811, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.739945155393053e-05, |
|
"loss": 0.0943, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_accuracy": 0.9688, |
|
"eval_f1": 0.9695391825045562, |
|
"eval_loss": 0.18509739637374878, |
|
"eval_precision": 0.9471007121057986, |
|
"eval_recall": 0.9930666666666667, |
|
"eval_runtime": 578.7851, |
|
"eval_samples_per_second": 6.479, |
|
"eval_steps_per_second": 0.81, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.641681901279708e-05, |
|
"loss": 0.2131, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_accuracy": 0.9826666666666667, |
|
"eval_f1": 0.9825970548862116, |
|
"eval_loss": 0.10872189700603485, |
|
"eval_precision": 0.9865591397849462, |
|
"eval_recall": 0.9786666666666667, |
|
"eval_runtime": 580.8924, |
|
"eval_samples_per_second": 6.456, |
|
"eval_steps_per_second": 0.807, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.5434186471663625e-05, |
|
"loss": 0.0829, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_accuracy": 0.9661333333333333, |
|
"eval_f1": 0.9651577503429356, |
|
"eval_loss": 0.2479323148727417, |
|
"eval_precision": 0.9937853107344633, |
|
"eval_recall": 0.9381333333333334, |
|
"eval_runtime": 578.7358, |
|
"eval_samples_per_second": 6.48, |
|
"eval_steps_per_second": 0.81, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.4451553930530165e-05, |
|
"loss": 0.1392, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_accuracy": 0.9613333333333334, |
|
"eval_f1": 0.959867146415721, |
|
"eval_loss": 0.20837165415287018, |
|
"eval_precision": 0.997698504027618, |
|
"eval_recall": 0.9248, |
|
"eval_runtime": 579.0845, |
|
"eval_samples_per_second": 6.476, |
|
"eval_steps_per_second": 0.81, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.346892138939671e-05, |
|
"loss": 0.0563, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_accuracy": 0.9874666666666667, |
|
"eval_f1": 0.9874833555259653, |
|
"eval_loss": 0.08351419121026993, |
|
"eval_precision": 0.9861702127659574, |
|
"eval_recall": 0.9888, |
|
"eval_runtime": 578.9179, |
|
"eval_samples_per_second": 6.478, |
|
"eval_steps_per_second": 0.81, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.2486288848263255e-05, |
|
"loss": 0.107, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_accuracy": 0.9826666666666667, |
|
"eval_f1": 0.9828269484808455, |
|
"eval_loss": 0.11463689059019089, |
|
"eval_precision": 0.9738219895287958, |
|
"eval_recall": 0.992, |
|
"eval_runtime": 579.4593, |
|
"eval_samples_per_second": 6.472, |
|
"eval_steps_per_second": 0.809, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.1503656307129798e-05, |
|
"loss": 0.0822, |
|
"step": 1247 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_accuracy": 0.9816, |
|
"eval_f1": 0.9817218543046358, |
|
"eval_loss": 0.1307775229215622, |
|
"eval_precision": 0.9752631578947368, |
|
"eval_recall": 0.9882666666666666, |
|
"eval_runtime": 580.0354, |
|
"eval_samples_per_second": 6.465, |
|
"eval_steps_per_second": 0.809, |
|
"step": 1247 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.0521023765996345e-05, |
|
"loss": 0.1165, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_accuracy": 0.9856, |
|
"eval_f1": 0.9855923159018143, |
|
"eval_loss": 0.09052357822656631, |
|
"eval_precision": 0.9861185264281901, |
|
"eval_recall": 0.9850666666666666, |
|
"eval_runtime": 581.231, |
|
"eval_samples_per_second": 6.452, |
|
"eval_steps_per_second": 0.807, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.953839122486289e-05, |
|
"loss": 0.1156, |
|
"step": 1333 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_accuracy": 0.9864, |
|
"eval_f1": 0.9864972200158858, |
|
"eval_loss": 0.09110942482948303, |
|
"eval_precision": 0.9794952681388013, |
|
"eval_recall": 0.9936, |
|
"eval_runtime": 579.1835, |
|
"eval_samples_per_second": 6.475, |
|
"eval_steps_per_second": 0.81, |
|
"step": 1333 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8555758683729435e-05, |
|
"loss": 0.1172, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_accuracy": 0.984, |
|
"eval_f1": 0.9841772151898733, |
|
"eval_loss": 0.09306684881448746, |
|
"eval_precision": 0.97339593114241, |
|
"eval_recall": 0.9952, |
|
"eval_runtime": 579.0887, |
|
"eval_samples_per_second": 6.476, |
|
"eval_steps_per_second": 0.81, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.7573126142595978e-05, |
|
"loss": 0.1447, |
|
"step": 1419 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_accuracy": 0.9848, |
|
"eval_f1": 0.9849086576648134, |
|
"eval_loss": 0.08812595903873444, |
|
"eval_precision": 0.9779179810725552, |
|
"eval_recall": 0.992, |
|
"eval_runtime": 579.3815, |
|
"eval_samples_per_second": 6.472, |
|
"eval_steps_per_second": 0.809, |
|
"step": 1419 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.659049360146252e-05, |
|
"loss": 0.087, |
|
"step": 1462 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_accuracy": 0.9872, |
|
"eval_f1": 0.9872272485364556, |
|
"eval_loss": 0.07127923518419266, |
|
"eval_precision": 0.9851301115241635, |
|
"eval_recall": 0.9893333333333333, |
|
"eval_runtime": 578.9808, |
|
"eval_samples_per_second": 6.477, |
|
"eval_steps_per_second": 0.81, |
|
"step": 1462 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.5607861060329068e-05, |
|
"loss": 0.0313, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_accuracy": 0.9861333333333333, |
|
"eval_f1": 0.9862142099681867, |
|
"eval_loss": 0.1003463938832283, |
|
"eval_precision": 0.9804955192409067, |
|
"eval_recall": 0.992, |
|
"eval_runtime": 579.6786, |
|
"eval_samples_per_second": 6.469, |
|
"eval_steps_per_second": 0.809, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.4625228519195613e-05, |
|
"loss": 0.0729, |
|
"step": 1548 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_accuracy": 0.9869333333333333, |
|
"eval_f1": 0.986859747921695, |
|
"eval_loss": 0.0750027745962143, |
|
"eval_precision": 0.9924487594390508, |
|
"eval_recall": 0.9813333333333333, |
|
"eval_runtime": 581.8591, |
|
"eval_samples_per_second": 6.445, |
|
"eval_steps_per_second": 0.806, |
|
"step": 1548 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.3642595978062158e-05, |
|
"loss": 0.106, |
|
"step": 1591 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_accuracy": 0.9874666666666667, |
|
"eval_f1": 0.9874499332443257, |
|
"eval_loss": 0.06067837029695511, |
|
"eval_precision": 0.9887700534759358, |
|
"eval_recall": 0.9861333333333333, |
|
"eval_runtime": 579.0471, |
|
"eval_samples_per_second": 6.476, |
|
"eval_steps_per_second": 0.81, |
|
"step": 1591 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.2659963436928701e-05, |
|
"loss": 0.0823, |
|
"step": 1634 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_accuracy": 0.9877333333333334, |
|
"eval_f1": 0.9877005347593582, |
|
"eval_loss": 0.07156907767057419, |
|
"eval_precision": 0.9903485254691688, |
|
"eval_recall": 0.9850666666666666, |
|
"eval_runtime": 578.809, |
|
"eval_samples_per_second": 6.479, |
|
"eval_steps_per_second": 0.81, |
|
"step": 1634 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.1677330895795248e-05, |
|
"loss": 0.0817, |
|
"step": 1677 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_accuracy": 0.9845333333333334, |
|
"eval_f1": 0.9846804014791337, |
|
"eval_loss": 0.09290226548910141, |
|
"eval_precision": 0.9754055468341183, |
|
"eval_recall": 0.9941333333333333, |
|
"eval_runtime": 578.5452, |
|
"eval_samples_per_second": 6.482, |
|
"eval_steps_per_second": 0.811, |
|
"step": 1677 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.0694698354661791e-05, |
|
"loss": 0.1279, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_accuracy": 0.988, |
|
"eval_f1": 0.9879453522635949, |
|
"eval_loss": 0.07037492841482162, |
|
"eval_precision": 0.9924650161463939, |
|
"eval_recall": 0.9834666666666667, |
|
"eval_runtime": 579.5947, |
|
"eval_samples_per_second": 6.47, |
|
"eval_steps_per_second": 0.809, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.712065813528338e-06, |
|
"loss": 0.075, |
|
"step": 1763 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_accuracy": 0.9890666666666666, |
|
"eval_f1": 0.9890462196099385, |
|
"eval_loss": 0.06325065344572067, |
|
"eval_precision": 0.9908993576017131, |
|
"eval_recall": 0.9872, |
|
"eval_runtime": 579.6298, |
|
"eval_samples_per_second": 6.47, |
|
"eval_steps_per_second": 0.809, |
|
"step": 1763 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.729433272394881e-06, |
|
"loss": 0.0909, |
|
"step": 1806 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_accuracy": 0.9842666666666666, |
|
"eval_f1": 0.9840841650930671, |
|
"eval_loss": 0.09650667011737823, |
|
"eval_precision": 0.9956331877729258, |
|
"eval_recall": 0.9728, |
|
"eval_runtime": 579.8501, |
|
"eval_samples_per_second": 6.467, |
|
"eval_steps_per_second": 0.809, |
|
"step": 1806 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.746800731261426e-06, |
|
"loss": 0.0583, |
|
"step": 1849 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_accuracy": 0.9893333333333333, |
|
"eval_f1": 0.9893276414087514, |
|
"eval_loss": 0.07162317633628845, |
|
"eval_precision": 0.9898558462359851, |
|
"eval_recall": 0.9888, |
|
"eval_runtime": 579.4799, |
|
"eval_samples_per_second": 6.471, |
|
"eval_steps_per_second": 0.809, |
|
"step": 1849 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 6.764168190127972e-06, |
|
"loss": 0.0961, |
|
"step": 1892 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_accuracy": 0.988, |
|
"eval_f1": 0.9879518072289156, |
|
"eval_loss": 0.05759565904736519, |
|
"eval_precision": 0.9919354838709677, |
|
"eval_recall": 0.984, |
|
"eval_runtime": 580.3503, |
|
"eval_samples_per_second": 6.462, |
|
"eval_steps_per_second": 0.808, |
|
"step": 1892 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5.781535648994515e-06, |
|
"loss": 0.0356, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_accuracy": 0.9882666666666666, |
|
"eval_f1": 0.9883103081827843, |
|
"eval_loss": 0.06381053477525711, |
|
"eval_precision": 0.984647961884595, |
|
"eval_recall": 0.992, |
|
"eval_runtime": 581.544, |
|
"eval_samples_per_second": 6.448, |
|
"eval_steps_per_second": 0.806, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.798903107861061e-06, |
|
"loss": 0.0848, |
|
"step": 1978 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.9893333333333333, |
|
"eval_f1": 0.9893162393162394, |
|
"eval_loss": 0.05824807286262512, |
|
"eval_precision": 0.990904226859283, |
|
"eval_recall": 0.9877333333333334, |
|
"eval_runtime": 578.359, |
|
"eval_samples_per_second": 6.484, |
|
"eval_steps_per_second": 0.811, |
|
"step": 1978 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.816270566727605e-06, |
|
"loss": 0.1019, |
|
"step": 2021 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_accuracy": 0.9890666666666666, |
|
"eval_f1": 0.9890579129970644, |
|
"eval_loss": 0.055534329265356064, |
|
"eval_precision": 0.9898504273504274, |
|
"eval_recall": 0.9882666666666666, |
|
"eval_runtime": 579.2162, |
|
"eval_samples_per_second": 6.474, |
|
"eval_steps_per_second": 0.81, |
|
"step": 2021 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.83363802559415e-06, |
|
"loss": 0.0274, |
|
"step": 2064 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_accuracy": 0.9893333333333333, |
|
"eval_f1": 0.9892990904226859, |
|
"eval_loss": 0.05893222615122795, |
|
"eval_precision": 0.9924852388620504, |
|
"eval_recall": 0.9861333333333333, |
|
"eval_runtime": 581.1057, |
|
"eval_samples_per_second": 6.453, |
|
"eval_steps_per_second": 0.807, |
|
"step": 2064 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.851005484460695e-06, |
|
"loss": 0.0313, |
|
"step": 2107 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_accuracy": 0.9893333333333333, |
|
"eval_f1": 0.9892933618843683, |
|
"eval_loss": 0.0617961585521698, |
|
"eval_precision": 0.9930145083288554, |
|
"eval_recall": 0.9856, |
|
"eval_runtime": 580.2597, |
|
"eval_samples_per_second": 6.463, |
|
"eval_steps_per_second": 0.808, |
|
"step": 2107 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.683729433272396e-07, |
|
"loss": 0.0513, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_accuracy": 0.9898666666666667, |
|
"eval_f1": 0.9898286937901497, |
|
"eval_loss": 0.05942407250404358, |
|
"eval_precision": 0.9935518538420204, |
|
"eval_recall": 0.9861333333333333, |
|
"eval_runtime": 580.5968, |
|
"eval_samples_per_second": 6.459, |
|
"eval_steps_per_second": 0.808, |
|
"step": 2150 |
|
} |
|
], |
|
"max_steps": 2188, |
|
"num_train_epochs": 1, |
|
"total_flos": 1.268902526976e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|