|
{ |
|
"best_metric": 0.6911338883028688, |
|
"best_model_checkpoint": "../save/jtrans-malware-2f-100c/checkpoint-1000", |
|
"epoch": 2.2573363431151243, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.97742663656885e-05, |
|
"loss": 0.4844, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.954853273137697e-05, |
|
"loss": 0.2551, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.932279909706546e-05, |
|
"loss": 0.1549, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.909706546275395e-05, |
|
"loss": 0.11, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.887133182844243e-05, |
|
"loss": 0.0974, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.864559819413092e-05, |
|
"loss": 0.0891, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.841986455981941e-05, |
|
"loss": 0.0876, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.81941309255079e-05, |
|
"loss": 0.0867, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.79683972911964e-05, |
|
"loss": 0.0825, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.774266365688489e-05, |
|
"loss": 0.0877, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_accuracy": 0.9824731182795698, |
|
"eval_f1": 0.0, |
|
"eval_loss": 0.08570680022239685, |
|
"eval_precision": 0.0, |
|
"eval_recall": 0.0, |
|
"eval_roc_auc_score": 0.6331024634550682, |
|
"eval_runtime": 2.0856, |
|
"eval_samples_per_second": 89.183, |
|
"eval_steps_per_second": 5.754, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.751693002257338e-05, |
|
"loss": 0.0791, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.729119638826185e-05, |
|
"loss": 0.0853, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 9.706546275395035e-05, |
|
"loss": 0.0892, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 9.683972911963884e-05, |
|
"loss": 0.0834, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 9.661399548532731e-05, |
|
"loss": 0.0862, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 9.63882618510158e-05, |
|
"loss": 0.0792, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 9.61625282167043e-05, |
|
"loss": 0.0826, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 9.593679458239279e-05, |
|
"loss": 0.0818, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 9.571106094808126e-05, |
|
"loss": 0.0813, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 9.548532731376975e-05, |
|
"loss": 0.0843, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_accuracy": 0.9824731182795698, |
|
"eval_f1": 0.0, |
|
"eval_loss": 0.08487475663423538, |
|
"eval_precision": 0.0, |
|
"eval_recall": 0.0, |
|
"eval_roc_auc_score": 0.6533783457136124, |
|
"eval_runtime": 2.1066, |
|
"eval_samples_per_second": 88.295, |
|
"eval_steps_per_second": 5.696, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 9.525959367945825e-05, |
|
"loss": 0.0805, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 9.503386004514672e-05, |
|
"loss": 0.0844, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.480812641083521e-05, |
|
"loss": 0.0919, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.45823927765237e-05, |
|
"loss": 0.0818, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.43566591422122e-05, |
|
"loss": 0.0866, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 9.413092550790069e-05, |
|
"loss": 0.0814, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 9.390519187358918e-05, |
|
"loss": 0.082, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.367945823927767e-05, |
|
"loss": 0.0845, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 9.345372460496615e-05, |
|
"loss": 0.083, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.322799097065464e-05, |
|
"loss": 0.0767, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_accuracy": 0.9824731182795698, |
|
"eval_f1": 0.0, |
|
"eval_loss": 0.08511281758546829, |
|
"eval_precision": 0.0, |
|
"eval_recall": 0.0, |
|
"eval_roc_auc_score": 0.6500109780834482, |
|
"eval_runtime": 2.0864, |
|
"eval_samples_per_second": 89.151, |
|
"eval_steps_per_second": 5.752, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.300225733634313e-05, |
|
"loss": 0.0889, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 9.27765237020316e-05, |
|
"loss": 0.0896, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.25507900677201e-05, |
|
"loss": 0.0782, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 9.232505643340859e-05, |
|
"loss": 0.0796, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 9.209932279909706e-05, |
|
"loss": 0.0865, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.187358916478555e-05, |
|
"loss": 0.0849, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 9.164785553047405e-05, |
|
"loss": 0.0806, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 9.142212189616254e-05, |
|
"loss": 0.084, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 9.119638826185101e-05, |
|
"loss": 0.0842, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 9.09706546275395e-05, |
|
"loss": 0.086, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.9824731182795698, |
|
"eval_f1": 0.0, |
|
"eval_loss": 0.0845983698964119, |
|
"eval_precision": 0.0, |
|
"eval_recall": 0.0, |
|
"eval_roc_auc_score": 0.6398895712235897, |
|
"eval_runtime": 2.0786, |
|
"eval_samples_per_second": 89.482, |
|
"eval_steps_per_second": 5.773, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 9.0744920993228e-05, |
|
"loss": 0.0811, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 9.051918735891649e-05, |
|
"loss": 0.0812, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 9.029345372460498e-05, |
|
"loss": 0.0856, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 9.006772009029347e-05, |
|
"loss": 0.0808, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 8.984198645598195e-05, |
|
"loss": 0.0867, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 8.961625282167044e-05, |
|
"loss": 0.0851, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 8.939051918735893e-05, |
|
"loss": 0.0788, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 8.91647855530474e-05, |
|
"loss": 0.083, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 8.89390519187359e-05, |
|
"loss": 0.0823, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 8.871331828442439e-05, |
|
"loss": 0.0853, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"eval_accuracy": 0.9831182795698925, |
|
"eval_f1": 0.07647058823529411, |
|
"eval_loss": 0.08377581834793091, |
|
"eval_precision": 0.9285714285714286, |
|
"eval_recall": 0.03987730061349693, |
|
"eval_roc_auc_score": 0.6488065782556061, |
|
"eval_runtime": 2.2005, |
|
"eval_samples_per_second": 84.524, |
|
"eval_steps_per_second": 5.453, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.848758465011288e-05, |
|
"loss": 0.0827, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.826185101580135e-05, |
|
"loss": 0.0753, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 8.803611738148985e-05, |
|
"loss": 0.0823, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 8.781038374717834e-05, |
|
"loss": 0.0811, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 8.758465011286681e-05, |
|
"loss": 0.0825, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 8.73589164785553e-05, |
|
"loss": 0.0848, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 8.71331828442438e-05, |
|
"loss": 0.0871, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 8.690744920993227e-05, |
|
"loss": 0.0807, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 8.668171557562076e-05, |
|
"loss": 0.0781, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 8.645598194130925e-05, |
|
"loss": 0.0812, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_accuracy": 0.9831182795698925, |
|
"eval_f1": 0.07647058823529411, |
|
"eval_loss": 0.08219148218631744, |
|
"eval_precision": 0.9285714285714286, |
|
"eval_recall": 0.03987730061349693, |
|
"eval_roc_auc_score": 0.6788379312590687, |
|
"eval_runtime": 2.0981, |
|
"eval_samples_per_second": 88.65, |
|
"eval_steps_per_second": 5.719, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 8.623024830699775e-05, |
|
"loss": 0.0807, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 8.600451467268624e-05, |
|
"loss": 0.0771, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 8.577878103837473e-05, |
|
"loss": 0.0789, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 8.555304740406322e-05, |
|
"loss": 0.0826, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 8.53273137697517e-05, |
|
"loss": 0.0816, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 8.510158013544019e-05, |
|
"loss": 0.0813, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 8.487584650112868e-05, |
|
"loss": 0.0811, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 8.465011286681715e-05, |
|
"loss": 0.0803, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 8.442437923250564e-05, |
|
"loss": 0.0866, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 8.419864559819414e-05, |
|
"loss": 0.0786, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_accuracy": 0.9831182795698925, |
|
"eval_f1": 0.07647058823529411, |
|
"eval_loss": 0.08220940083265305, |
|
"eval_precision": 0.9285714285714286, |
|
"eval_recall": 0.03987730061349693, |
|
"eval_roc_auc_score": 0.6817703720663841, |
|
"eval_runtime": 2.081, |
|
"eval_samples_per_second": 89.38, |
|
"eval_steps_per_second": 5.766, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 8.397291196388263e-05, |
|
"loss": 0.0798, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 8.37471783295711e-05, |
|
"loss": 0.0826, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 8.35214446952596e-05, |
|
"loss": 0.0816, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 8.329571106094809e-05, |
|
"loss": 0.0815, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 8.306997742663656e-05, |
|
"loss": 0.0828, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 8.284424379232505e-05, |
|
"loss": 0.0761, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 8.261851015801354e-05, |
|
"loss": 0.088, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 8.239277652370204e-05, |
|
"loss": 0.074, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 8.216704288939053e-05, |
|
"loss": 0.0848, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 8.194130925507902e-05, |
|
"loss": 0.081, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_accuracy": 0.9831182795698925, |
|
"eval_f1": 0.07647058823529411, |
|
"eval_loss": 0.08278104662895203, |
|
"eval_precision": 0.9285714285714286, |
|
"eval_recall": 0.03987730061349693, |
|
"eval_roc_auc_score": 0.6632475420171875, |
|
"eval_runtime": 2.1218, |
|
"eval_samples_per_second": 87.663, |
|
"eval_steps_per_second": 5.656, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 8.17155756207675e-05, |
|
"loss": 0.0734, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 8.148984198645599e-05, |
|
"loss": 0.0814, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 8.126410835214448e-05, |
|
"loss": 0.0817, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 8.103837471783297e-05, |
|
"loss": 0.0781, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 8.081264108352144e-05, |
|
"loss": 0.074, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 8.058690744920994e-05, |
|
"loss": 0.084, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 8.036117381489843e-05, |
|
"loss": 0.0831, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 8.01354401805869e-05, |
|
"loss": 0.0822, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 7.99097065462754e-05, |
|
"loss": 0.0769, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 7.968397291196389e-05, |
|
"loss": 0.0827, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_accuracy": 0.9831182795698925, |
|
"eval_f1": 0.07647058823529411, |
|
"eval_loss": 0.08223988860845566, |
|
"eval_precision": 0.9285714285714286, |
|
"eval_recall": 0.03987730061349693, |
|
"eval_roc_auc_score": 0.6819600545479816, |
|
"eval_runtime": 2.088, |
|
"eval_samples_per_second": 89.078, |
|
"eval_steps_per_second": 5.747, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 7.945823927765236e-05, |
|
"loss": 0.0764, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 7.923250564334085e-05, |
|
"loss": 0.084, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 7.900677200902934e-05, |
|
"loss": 0.0823, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 7.878103837471784e-05, |
|
"loss": 0.0767, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 7.855530474040633e-05, |
|
"loss": 0.0803, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 7.832957110609482e-05, |
|
"loss": 0.0799, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 7.810383747178331e-05, |
|
"loss": 0.0811, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 7.787810383747179e-05, |
|
"loss": 0.0858, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 7.765237020316028e-05, |
|
"loss": 0.081, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 7.742663656884877e-05, |
|
"loss": 0.0783, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_accuracy": 0.9831182795698925, |
|
"eval_f1": 0.07647058823529411, |
|
"eval_loss": 0.08187016099691391, |
|
"eval_precision": 0.9285714285714286, |
|
"eval_recall": 0.03987730061349693, |
|
"eval_roc_auc_score": 0.6911338883028688, |
|
"eval_runtime": 2.0861, |
|
"eval_samples_per_second": 89.164, |
|
"eval_steps_per_second": 5.752, |
|
"step": 1000 |
|
} |
|
], |
|
"max_steps": 4430, |
|
"num_train_epochs": 10, |
|
"total_flos": 4240426445832192.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|