{ "best_metric": 0.12050338089466095, "best_model_checkpoint": "/home/datawork-iot-nos/Seatizen/models/multilabel/fine_scale/DinoVdeau-large-2024_09_05-batch-size32_epochs150_freeze/checkpoint-22386", "epoch": 92.0, "eval_steps": 500, "global_step": 25116, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.22314622314622315, "eval_f1_macro": 0.5430112866470752, "eval_f1_micro": 0.7516596896274684, "eval_loss": 0.16899551451206207, "eval_roc_auc": 0.8384250127967259, "eval_runtime": 514.8139, "eval_samples_per_second": 5.606, "eval_steps_per_second": 0.177, "learning_rate": 0.001, "step": 273 }, { "epoch": 1.8315018315018317, "grad_norm": 0.9695320725440979, "learning_rate": 0.001, "loss": 0.2719, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.24012474012474014, "eval_f1_macro": 0.5721428312627432, "eval_f1_micro": 0.765669700910273, "eval_loss": 0.153842031955719, "eval_roc_auc": 0.8396070197954885, "eval_runtime": 520.9151, "eval_samples_per_second": 5.54, "eval_steps_per_second": 0.175, "learning_rate": 0.001, "step": 546 }, { "epoch": 3.0, "eval_accuracy": 0.23458073458073458, "eval_f1_macro": 0.6137585525531024, "eval_f1_micro": 0.7772688719253604, "eval_loss": 0.14828726649284363, "eval_roc_auc": 0.851613165443153, "eval_runtime": 519.0164, "eval_samples_per_second": 5.561, "eval_steps_per_second": 0.175, "learning_rate": 0.001, "step": 819 }, { "epoch": 3.663003663003663, "grad_norm": 0.24952150881290436, "learning_rate": 0.001, "loss": 0.1694, "step": 1000 }, { "epoch": 4.0, "eval_accuracy": 0.2494802494802495, "eval_f1_macro": 0.6224730910908008, "eval_f1_micro": 0.7722737615963591, "eval_loss": 0.1479637324810028, "eval_roc_auc": 0.8406560025496872, "eval_runtime": 512.3298, "eval_samples_per_second": 5.633, "eval_steps_per_second": 0.178, "learning_rate": 0.001, "step": 1092 }, { "epoch": 5.0, "eval_accuracy": 0.2494802494802495, "eval_f1_macro": 0.6302307709949958, "eval_f1_micro": 0.779738930569409, "eval_loss": 0.14575305581092834, "eval_roc_auc": 0.8469713476749664, "eval_runtime": 509.5723, "eval_samples_per_second": 5.664, "eval_steps_per_second": 0.179, "learning_rate": 0.001, "step": 1365 }, { "epoch": 5.4945054945054945, "grad_norm": 0.17697261273860931, "learning_rate": 0.001, "loss": 0.1625, "step": 1500 }, { "epoch": 6.0, "eval_accuracy": 0.2480942480942481, "eval_f1_macro": 0.6092591780781843, "eval_f1_micro": 0.7798061948433986, "eval_loss": 0.14499613642692566, "eval_roc_auc": 0.8476621294180898, "eval_runtime": 511.5766, "eval_samples_per_second": 5.641, "eval_steps_per_second": 0.178, "learning_rate": 0.001, "step": 1638 }, { "epoch": 7.0, "eval_accuracy": 0.2525987525987526, "eval_f1_macro": 0.624806622732382, "eval_f1_micro": 0.7767369242779079, "eval_loss": 0.1474585235118866, "eval_roc_auc": 0.8453623673245133, "eval_runtime": 510.6821, "eval_samples_per_second": 5.651, "eval_steps_per_second": 0.178, "learning_rate": 0.001, "step": 1911 }, { "epoch": 7.326007326007326, "grad_norm": 0.24790136516094208, "learning_rate": 0.001, "loss": 0.1592, "step": 2000 }, { "epoch": 8.0, "eval_accuracy": 0.25744975744975745, "eval_f1_macro": 0.6249401475720361, "eval_f1_micro": 0.7803859753759638, "eval_loss": 0.14568069577217102, "eval_roc_auc": 0.8520784887308331, "eval_runtime": 514.1071, "eval_samples_per_second": 5.614, "eval_steps_per_second": 0.177, "learning_rate": 0.001, "step": 2184 }, { "epoch": 9.0, "eval_accuracy": 0.25744975744975745, "eval_f1_macro": 0.652642904607388, "eval_f1_micro": 0.7868685150535805, "eval_loss": 0.14169421792030334, "eval_roc_auc": 0.8560913219420118, "eval_runtime": 518.322, "eval_samples_per_second": 5.568, "eval_steps_per_second": 0.176, "learning_rate": 0.001, "step": 2457 }, { "epoch": 9.157509157509157, "grad_norm": 0.2022881656885147, "learning_rate": 0.001, "loss": 0.157, "step": 2500 }, { "epoch": 10.0, "eval_accuracy": 0.25467775467775466, "eval_f1_macro": 0.6289931868767601, "eval_f1_micro": 0.7757335098168984, "eval_loss": 0.1436299830675125, "eval_roc_auc": 0.8403493908543921, "eval_runtime": 514.5559, "eval_samples_per_second": 5.609, "eval_steps_per_second": 0.177, "learning_rate": 0.001, "step": 2730 }, { "epoch": 10.989010989010989, "grad_norm": 0.16650938987731934, "learning_rate": 0.001, "loss": 0.1563, "step": 3000 }, { "epoch": 11.0, "eval_accuracy": 0.26403326403326405, "eval_f1_macro": 0.6447870111639475, "eval_f1_micro": 0.7886988341417751, "eval_loss": 0.1428152322769165, "eval_roc_auc": 0.8569209092596786, "eval_runtime": 515.3716, "eval_samples_per_second": 5.6, "eval_steps_per_second": 0.177, "learning_rate": 0.001, "step": 3003 }, { "epoch": 12.0, "eval_accuracy": 0.25814275814275817, "eval_f1_macro": 0.6493205009564239, "eval_f1_micro": 0.7904845227679873, "eval_loss": 0.1438700556755066, "eval_roc_auc": 0.8637698939454586, "eval_runtime": 512.0567, "eval_samples_per_second": 5.636, "eval_steps_per_second": 0.178, "learning_rate": 0.001, "step": 3276 }, { "epoch": 12.820512820512821, "grad_norm": 0.163461372256279, "learning_rate": 0.001, "loss": 0.1558, "step": 3500 }, { "epoch": 13.0, "eval_accuracy": 0.2713097713097713, "eval_f1_macro": 0.6561811626743236, "eval_f1_micro": 0.7906956746065871, "eval_loss": 0.13913600146770477, "eval_roc_auc": 0.8551388511813229, "eval_runtime": 511.5092, "eval_samples_per_second": 5.642, "eval_steps_per_second": 0.178, "learning_rate": 0.001, "step": 3549 }, { "epoch": 14.0, "eval_accuracy": 0.2643797643797644, "eval_f1_macro": 0.6337626365639194, "eval_f1_micro": 0.783810807286006, "eval_loss": 0.14094506204128265, "eval_roc_auc": 0.8484895839481307, "eval_runtime": 513.4311, "eval_samples_per_second": 5.621, "eval_steps_per_second": 0.177, "learning_rate": 0.001, "step": 3822 }, { "epoch": 14.652014652014651, "grad_norm": 0.17725127935409546, "learning_rate": 0.001, "loss": 0.1543, "step": 4000 }, { "epoch": 15.0, "eval_accuracy": 0.2577962577962578, "eval_f1_macro": 0.6463067634895379, "eval_f1_micro": 0.7907172995780591, "eval_loss": 0.1396123319864273, "eval_roc_auc": 0.8603407738558333, "eval_runtime": 520.2063, "eval_samples_per_second": 5.548, "eval_steps_per_second": 0.175, "learning_rate": 0.001, "step": 4095 }, { "epoch": 16.0, "eval_accuracy": 0.2654192654192654, "eval_f1_macro": 0.6593840515969085, "eval_f1_micro": 0.7913274487959551, "eval_loss": 0.13904806971549988, "eval_roc_auc": 0.856418510343081, "eval_runtime": 522.3782, "eval_samples_per_second": 5.525, "eval_steps_per_second": 0.174, "learning_rate": 0.001, "step": 4368 }, { "epoch": 16.483516483516482, "grad_norm": 0.16505596041679382, "learning_rate": 0.001, "loss": 0.1535, "step": 4500 }, { "epoch": 17.0, "eval_accuracy": 0.2564102564102564, "eval_f1_macro": 0.6585824628325464, "eval_f1_micro": 0.7939832128313804, "eval_loss": 0.1418265849351883, "eval_roc_auc": 0.8664525383660324, "eval_runtime": 520.8828, "eval_samples_per_second": 5.541, "eval_steps_per_second": 0.175, "learning_rate": 0.001, "step": 4641 }, { "epoch": 18.0, "eval_accuracy": 0.26576576576576577, "eval_f1_macro": 0.6560187518750095, "eval_f1_micro": 0.7957187827911858, "eval_loss": 0.14155420660972595, "eval_roc_auc": 0.864558649781785, "eval_runtime": 521.9656, "eval_samples_per_second": 5.529, "eval_steps_per_second": 0.174, "learning_rate": 0.001, "step": 4914 }, { "epoch": 18.315018315018314, "grad_norm": 0.176731139421463, "learning_rate": 0.001, "loss": 0.1549, "step": 5000 }, { "epoch": 19.0, "eval_accuracy": 0.262993762993763, "eval_f1_macro": 0.6524018082903621, "eval_f1_micro": 0.7885625699767461, "eval_loss": 0.14027266204357147, "eval_roc_auc": 0.8535729424099051, "eval_runtime": 525.594, "eval_samples_per_second": 5.491, "eval_steps_per_second": 0.173, "learning_rate": 0.001, "step": 5187 }, { "epoch": 20.0, "eval_accuracy": 0.26126126126126126, "eval_f1_macro": 0.6558190248610255, "eval_f1_micro": 0.7910696719558615, "eval_loss": 0.14759798347949982, "eval_roc_auc": 0.8567849608157283, "eval_runtime": 533.1376, "eval_samples_per_second": 5.413, "eval_steps_per_second": 0.171, "learning_rate": 0.001, "step": 5460 }, { "epoch": 20.146520146520146, "grad_norm": 0.15767891705036163, "learning_rate": 0.001, "loss": 0.154, "step": 5500 }, { "epoch": 21.0, "eval_accuracy": 0.26576576576576577, "eval_f1_macro": 0.6397027546064713, "eval_f1_micro": 0.7879767016708474, "eval_loss": 0.14285211265087128, "eval_roc_auc": 0.8567511447301636, "eval_runtime": 527.0011, "eval_samples_per_second": 5.476, "eval_steps_per_second": 0.173, "learning_rate": 0.001, "step": 5733 }, { "epoch": 21.978021978021978, "grad_norm": 0.18300685286521912, "learning_rate": 0.001, "loss": 0.1529, "step": 6000 }, { "epoch": 22.0, "eval_accuracy": 0.26126126126126126, "eval_f1_macro": 0.650810186340724, "eval_f1_micro": 0.7936799099512236, "eval_loss": 0.141402930021286, "eval_roc_auc": 0.8653510005054305, "eval_runtime": 525.9127, "eval_samples_per_second": 5.488, "eval_steps_per_second": 0.173, "learning_rate": 0.001, "step": 6006 }, { "epoch": 23.0, "eval_accuracy": 0.26853776853776856, "eval_f1_macro": 0.6618136826297922, "eval_f1_micro": 0.7975794766896787, "eval_loss": 0.1415141373872757, "eval_roc_auc": 0.8613092204030781, "eval_runtime": 530.5247, "eval_samples_per_second": 5.44, "eval_steps_per_second": 0.172, "learning_rate": 0.0001, "step": 6279 }, { "epoch": 23.80952380952381, "grad_norm": 0.16848017275333405, "learning_rate": 0.0001, "loss": 0.1449, "step": 6500 }, { "epoch": 24.0, "eval_accuracy": 0.27893277893277896, "eval_f1_macro": 0.6750686264509598, "eval_f1_micro": 0.8044778018063861, "eval_loss": 0.13230843842029572, "eval_roc_auc": 0.8664561198395929, "eval_runtime": 521.5756, "eval_samples_per_second": 5.533, "eval_steps_per_second": 0.174, "learning_rate": 0.0001, "step": 6552 }, { "epoch": 25.0, "eval_accuracy": 0.27927927927927926, "eval_f1_macro": 0.6724022117445357, "eval_f1_micro": 0.8044072500946213, "eval_loss": 0.13101588189601898, "eval_roc_auc": 0.868781233937024, "eval_runtime": 523.3306, "eval_samples_per_second": 5.515, "eval_steps_per_second": 0.174, "learning_rate": 0.0001, "step": 6825 }, { "epoch": 25.641025641025642, "grad_norm": 0.16336454451084137, "learning_rate": 0.0001, "loss": 0.1416, "step": 7000 }, { "epoch": 26.0, "eval_accuracy": 0.28205128205128205, "eval_f1_macro": 0.6689442300740391, "eval_f1_micro": 0.8035965398218775, "eval_loss": 0.13268393278121948, "eval_roc_auc": 0.8645798435204571, "eval_runtime": 532.8406, "eval_samples_per_second": 5.416, "eval_steps_per_second": 0.171, "learning_rate": 0.0001, "step": 7098 }, { "epoch": 27.0, "eval_accuracy": 0.2817047817047817, "eval_f1_macro": 0.679681812643572, "eval_f1_micro": 0.8068647969861867, "eval_loss": 0.1317097693681717, "eval_roc_auc": 0.8714747032608311, "eval_runtime": 527.4278, "eval_samples_per_second": 5.472, "eval_steps_per_second": 0.173, "learning_rate": 0.0001, "step": 7371 }, { "epoch": 27.47252747252747, "grad_norm": 0.1572931855916977, "learning_rate": 0.0001, "loss": 0.1391, "step": 7500 }, { "epoch": 28.0, "eval_accuracy": 0.27754677754677753, "eval_f1_macro": 0.6818462300001074, "eval_f1_micro": 0.8072126727334008, "eval_loss": 0.12880520522594452, "eval_roc_auc": 0.8697994857701482, "eval_runtime": 536.9046, "eval_samples_per_second": 5.375, "eval_steps_per_second": 0.169, "learning_rate": 0.0001, "step": 7644 }, { "epoch": 29.0, "eval_accuracy": 0.2844767844767845, "eval_f1_macro": 0.6807929806344717, "eval_f1_micro": 0.8038088702067427, "eval_loss": 0.12942521274089813, "eval_roc_auc": 0.8628519636133017, "eval_runtime": 520.5065, "eval_samples_per_second": 5.545, "eval_steps_per_second": 0.175, "learning_rate": 0.0001, "step": 7917 }, { "epoch": 29.304029304029303, "grad_norm": 0.19199338555335999, "learning_rate": 0.0001, "loss": 0.138, "step": 8000 }, { "epoch": 30.0, "eval_accuracy": 0.28586278586278585, "eval_f1_macro": 0.6825529208005033, "eval_f1_micro": 0.8077149835761811, "eval_loss": 0.12943296134471893, "eval_roc_auc": 0.8701959964759374, "eval_runtime": 543.5755, "eval_samples_per_second": 5.309, "eval_steps_per_second": 0.167, "learning_rate": 0.0001, "step": 8190 }, { "epoch": 31.0, "eval_accuracy": 0.28794178794178793, "eval_f1_macro": 0.6779122940127521, "eval_f1_micro": 0.8073808915025994, "eval_loss": 0.12738928198814392, "eval_roc_auc": 0.8666172459085354, "eval_runtime": 521.4164, "eval_samples_per_second": 5.535, "eval_steps_per_second": 0.175, "learning_rate": 0.0001, "step": 8463 }, { "epoch": 31.135531135531135, "grad_norm": 0.1997932642698288, "learning_rate": 0.0001, "loss": 0.1364, "step": 8500 }, { "epoch": 32.0, "eval_accuracy": 0.2882882882882883, "eval_f1_macro": 0.6868638344898197, "eval_f1_micro": 0.8104185890445432, "eval_loss": 0.12775012850761414, "eval_roc_auc": 0.8728485806633693, "eval_runtime": 519.8308, "eval_samples_per_second": 5.552, "eval_steps_per_second": 0.175, "learning_rate": 0.0001, "step": 8736 }, { "epoch": 32.967032967032964, "grad_norm": 0.19476589560508728, "learning_rate": 0.0001, "loss": 0.1359, "step": 9000 }, { "epoch": 33.0, "eval_accuracy": 0.2869022869022869, "eval_f1_macro": 0.6810807224403135, "eval_f1_micro": 0.8077248140635565, "eval_loss": 0.12765593826770782, "eval_roc_auc": 0.8692062891212271, "eval_runtime": 514.7142, "eval_samples_per_second": 5.607, "eval_steps_per_second": 0.177, "learning_rate": 0.0001, "step": 9009 }, { "epoch": 34.0, "eval_accuracy": 0.2882882882882883, "eval_f1_macro": 0.687361527737602, "eval_f1_micro": 0.8108837797932926, "eval_loss": 0.12660712003707886, "eval_roc_auc": 0.8714320206807965, "eval_runtime": 514.9645, "eval_samples_per_second": 5.604, "eval_steps_per_second": 0.177, "learning_rate": 0.0001, "step": 9282 }, { "epoch": 34.798534798534796, "grad_norm": 0.2034957855939865, "learning_rate": 0.0001, "loss": 0.1341, "step": 9500 }, { "epoch": 35.0, "eval_accuracy": 0.29036729036729036, "eval_f1_macro": 0.688483181989703, "eval_f1_micro": 0.8103963941193815, "eval_loss": 0.1262102574110031, "eval_roc_auc": 0.8715800817488106, "eval_runtime": 525.0872, "eval_samples_per_second": 5.496, "eval_steps_per_second": 0.173, "learning_rate": 0.0001, "step": 9555 }, { "epoch": 36.0, "eval_accuracy": 0.28274428274428276, "eval_f1_macro": 0.6876394944988364, "eval_f1_micro": 0.8070400273399119, "eval_loss": 0.12687553465366364, "eval_roc_auc": 0.8657418371913091, "eval_runtime": 513.0757, "eval_samples_per_second": 5.625, "eval_steps_per_second": 0.177, "learning_rate": 0.0001, "step": 9828 }, { "epoch": 36.63003663003663, "grad_norm": 0.20557202398777008, "learning_rate": 0.0001, "loss": 0.1339, "step": 10000 }, { "epoch": 37.0, "eval_accuracy": 0.28655578655578656, "eval_f1_macro": 0.6833930255395054, "eval_f1_micro": 0.8081597960050999, "eval_loss": 0.12656189501285553, "eval_roc_auc": 0.8678163688633396, "eval_runtime": 515.3436, "eval_samples_per_second": 5.6, "eval_steps_per_second": 0.177, "learning_rate": 0.0001, "step": 10101 }, { "epoch": 38.0, "eval_accuracy": 0.2955647955647956, "eval_f1_macro": 0.6936175483283518, "eval_f1_micro": 0.8106371284826448, "eval_loss": 0.12547720968723297, "eval_roc_auc": 0.8706625538294134, "eval_runtime": 512.1358, "eval_samples_per_second": 5.635, "eval_steps_per_second": 0.178, "learning_rate": 0.0001, "step": 10374 }, { "epoch": 38.46153846153846, "grad_norm": 0.2112371176481247, "learning_rate": 0.0001, "loss": 0.1307, "step": 10500 }, { "epoch": 39.0, "eval_accuracy": 0.2927927927927928, "eval_f1_macro": 0.6985657340894045, "eval_f1_micro": 0.8141880626875626, "eval_loss": 0.12485096603631973, "eval_roc_auc": 0.8767653445350737, "eval_runtime": 512.6109, "eval_samples_per_second": 5.63, "eval_steps_per_second": 0.178, "learning_rate": 0.0001, "step": 10647 }, { "epoch": 40.0, "eval_accuracy": 0.2934857934857935, "eval_f1_macro": 0.6989554260935754, "eval_f1_micro": 0.8138017044273539, "eval_loss": 0.1257668137550354, "eval_roc_auc": 0.8773247787534647, "eval_runtime": 513.8833, "eval_samples_per_second": 5.616, "eval_steps_per_second": 0.177, "learning_rate": 0.0001, "step": 10920 }, { "epoch": 40.29304029304029, "grad_norm": 0.23032954335212708, "learning_rate": 0.0001, "loss": 0.1317, "step": 11000 }, { "epoch": 41.0, "eval_accuracy": 0.29244629244629244, "eval_f1_macro": 0.6923923602014324, "eval_f1_micro": 0.8101351925856646, "eval_loss": 0.12528541684150696, "eval_roc_auc": 0.8687915491174283, "eval_runtime": 513.0005, "eval_samples_per_second": 5.626, "eval_steps_per_second": 0.177, "learning_rate": 0.0001, "step": 11193 }, { "epoch": 42.0, "eval_accuracy": 0.3004158004158004, "eval_f1_macro": 0.6970236383039276, "eval_f1_micro": 0.8138018093835474, "eval_loss": 0.12443084269762039, "eval_roc_auc": 0.8737649281720051, "eval_runtime": 525.5315, "eval_samples_per_second": 5.492, "eval_steps_per_second": 0.173, "learning_rate": 0.0001, "step": 11466 }, { "epoch": 42.124542124542124, "grad_norm": 0.23487386107444763, "learning_rate": 0.0001, "loss": 0.1308, "step": 11500 }, { "epoch": 43.0, "eval_accuracy": 0.2948717948717949, "eval_f1_macro": 0.6956334056896907, "eval_f1_micro": 0.8131470414948238, "eval_loss": 0.12451612949371338, "eval_roc_auc": 0.8733690344991142, "eval_runtime": 514.3778, "eval_samples_per_second": 5.611, "eval_steps_per_second": 0.177, "learning_rate": 0.0001, "step": 11739 }, { "epoch": 43.956043956043956, "grad_norm": 0.25621357560157776, "learning_rate": 0.0001, "loss": 0.1307, "step": 12000 }, { "epoch": 44.0, "eval_accuracy": 0.2966042966042966, "eval_f1_macro": 0.6915470420512126, "eval_f1_micro": 0.812950847173293, "eval_loss": 0.12501148879528046, "eval_roc_auc": 0.8742664283667729, "eval_runtime": 519.3764, "eval_samples_per_second": 5.557, "eval_steps_per_second": 0.175, "learning_rate": 0.0001, "step": 12012 }, { "epoch": 45.0, "eval_accuracy": 0.29625779625779625, "eval_f1_macro": 0.7050548840380568, "eval_f1_micro": 0.8136846971798428, "eval_loss": 0.12397606670856476, "eval_roc_auc": 0.8740443367647517, "eval_runtime": 515.8997, "eval_samples_per_second": 5.594, "eval_steps_per_second": 0.176, "learning_rate": 0.0001, "step": 12285 }, { "epoch": 45.78754578754579, "grad_norm": 0.22914335131645203, "learning_rate": 0.0001, "loss": 0.1295, "step": 12500 }, { "epoch": 46.0, "eval_accuracy": 0.29764379764379767, "eval_f1_macro": 0.6987723620069867, "eval_f1_micro": 0.8130628734954971, "eval_loss": 0.12409698963165283, "eval_roc_auc": 0.8733228777555885, "eval_runtime": 516.6269, "eval_samples_per_second": 5.586, "eval_steps_per_second": 0.176, "learning_rate": 0.0001, "step": 12558 }, { "epoch": 47.0, "eval_accuracy": 0.2955647955647956, "eval_f1_macro": 0.6957628076563835, "eval_f1_micro": 0.811911298838437, "eval_loss": 0.12429661303758621, "eval_roc_auc": 0.8716271908692008, "eval_runtime": 518.0917, "eval_samples_per_second": 5.57, "eval_steps_per_second": 0.176, "learning_rate": 0.0001, "step": 12831 }, { "epoch": 47.61904761904762, "grad_norm": 0.25639113783836365, "learning_rate": 0.0001, "loss": 0.1293, "step": 13000 }, { "epoch": 48.0, "eval_accuracy": 0.2955647955647956, "eval_f1_macro": 0.6990296569974817, "eval_f1_micro": 0.8135280295401142, "eval_loss": 0.12393072247505188, "eval_roc_auc": 0.874436809929186, "eval_runtime": 517.4634, "eval_samples_per_second": 5.577, "eval_steps_per_second": 0.176, "learning_rate": 0.0001, "step": 13104 }, { "epoch": 49.0, "eval_accuracy": 0.29972279972279975, "eval_f1_macro": 0.7007060102949784, "eval_f1_micro": 0.8152993625265614, "eval_loss": 0.1242954283952713, "eval_roc_auc": 0.8774914581184896, "eval_runtime": 511.3749, "eval_samples_per_second": 5.644, "eval_steps_per_second": 0.178, "learning_rate": 0.0001, "step": 13377 }, { "epoch": 49.45054945054945, "grad_norm": 0.27197974920272827, "learning_rate": 0.0001, "loss": 0.1274, "step": 13500 }, { "epoch": 50.0, "eval_accuracy": 0.29799029799029797, "eval_f1_macro": 0.6999734070385492, "eval_f1_micro": 0.8151919866444074, "eval_loss": 0.12405084818601608, "eval_roc_auc": 0.8769273693258459, "eval_runtime": 509.3276, "eval_samples_per_second": 5.666, "eval_steps_per_second": 0.179, "learning_rate": 0.0001, "step": 13650 }, { "epoch": 51.0, "eval_accuracy": 0.3011088011088011, "eval_f1_macro": 0.7055935576453343, "eval_f1_micro": 0.8153039745759215, "eval_loss": 0.12483017891645432, "eval_roc_auc": 0.8803007418345086, "eval_runtime": 511.0056, "eval_samples_per_second": 5.648, "eval_steps_per_second": 0.178, "learning_rate": 0.0001, "step": 13923 }, { "epoch": 51.282051282051285, "grad_norm": 0.23091430962085724, "learning_rate": 0.0001, "loss": 0.1271, "step": 14000 }, { "epoch": 52.0, "eval_accuracy": 0.3049203049203049, "eval_f1_macro": 0.7035566403965832, "eval_f1_micro": 0.8157241959217996, "eval_loss": 0.12426182627677917, "eval_roc_auc": 0.8750656737623661, "eval_runtime": 511.1647, "eval_samples_per_second": 5.646, "eval_steps_per_second": 0.178, "learning_rate": 0.0001, "step": 14196 }, { "epoch": 53.0, "eval_accuracy": 0.30214830214830213, "eval_f1_macro": 0.7031528349086803, "eval_f1_micro": 0.8152648882600192, "eval_loss": 0.12408608943223953, "eval_roc_auc": 0.8778170234547618, "eval_runtime": 520.4, "eval_samples_per_second": 5.546, "eval_steps_per_second": 0.175, "learning_rate": 0.0001, "step": 14469 }, { "epoch": 53.11355311355312, "grad_norm": 0.23177389800548553, "learning_rate": 0.0001, "loss": 0.1275, "step": 14500 }, { "epoch": 54.0, "eval_accuracy": 0.30214830214830213, "eval_f1_macro": 0.7067666695453366, "eval_f1_micro": 0.8152251458307105, "eval_loss": 0.12344320118427277, "eval_roc_auc": 0.8753333050750151, "eval_runtime": 522.8329, "eval_samples_per_second": 5.52, "eval_steps_per_second": 0.174, "learning_rate": 0.0001, "step": 14742 }, { "epoch": 54.94505494505494, "grad_norm": 0.3403611481189728, "learning_rate": 0.0001, "loss": 0.1256, "step": 15000 }, { "epoch": 55.0, "eval_accuracy": 0.30180180180180183, "eval_f1_macro": 0.7075536762185066, "eval_f1_micro": 0.8166332665330662, "eval_loss": 0.12307523190975189, "eval_roc_auc": 0.8776256091187804, "eval_runtime": 513.5394, "eval_samples_per_second": 5.62, "eval_steps_per_second": 0.177, "learning_rate": 0.0001, "step": 15015 }, { "epoch": 56.0, "eval_accuracy": 0.30665280665280664, "eval_f1_macro": 0.7087921855865761, "eval_f1_micro": 0.8189626693095475, "eval_loss": 0.12282071262598038, "eval_roc_auc": 0.8821854285803199, "eval_runtime": 519.2592, "eval_samples_per_second": 5.558, "eval_steps_per_second": 0.175, "learning_rate": 0.0001, "step": 15288 }, { "epoch": 56.776556776556774, "grad_norm": 0.28649473190307617, "learning_rate": 0.0001, "loss": 0.1258, "step": 15500 }, { "epoch": 57.0, "eval_accuracy": 0.306999306999307, "eval_f1_macro": 0.7079839879234633, "eval_f1_micro": 0.8160328019748128, "eval_loss": 0.12259934842586517, "eval_roc_auc": 0.8766650096203477, "eval_runtime": 523.8952, "eval_samples_per_second": 5.509, "eval_steps_per_second": 0.174, "learning_rate": 0.0001, "step": 15561 }, { "epoch": 58.0, "eval_accuracy": 0.30214830214830213, "eval_f1_macro": 0.7072503847729165, "eval_f1_micro": 0.8170145133631687, "eval_loss": 0.12334763258695602, "eval_roc_auc": 0.8773053153896588, "eval_runtime": 522.7463, "eval_samples_per_second": 5.521, "eval_steps_per_second": 0.174, "learning_rate": 0.0001, "step": 15834 }, { "epoch": 58.608058608058606, "grad_norm": 0.2677023112773895, "learning_rate": 0.0001, "loss": 0.1258, "step": 16000 }, { "epoch": 59.0, "eval_accuracy": 0.30214830214830213, "eval_f1_macro": 0.713532815646164, "eval_f1_micro": 0.8172105834237543, "eval_loss": 0.12272054702043533, "eval_roc_auc": 0.8780682765680952, "eval_runtime": 524.8476, "eval_samples_per_second": 5.499, "eval_steps_per_second": 0.173, "learning_rate": 0.0001, "step": 16107 }, { "epoch": 60.0, "eval_accuracy": 0.30214830214830213, "eval_f1_macro": 0.7039801220819605, "eval_f1_micro": 0.8142579609764339, "eval_loss": 0.12334387749433517, "eval_roc_auc": 0.8729462194126062, "eval_runtime": 526.97, "eval_samples_per_second": 5.477, "eval_steps_per_second": 0.173, "learning_rate": 0.0001, "step": 16380 }, { "epoch": 60.43956043956044, "grad_norm": 0.273879736661911, "learning_rate": 0.0001, "loss": 0.1252, "step": 16500 }, { "epoch": 61.0, "eval_accuracy": 0.3042273042273042, "eval_f1_macro": 0.7120578542808926, "eval_f1_micro": 0.816814564846061, "eval_loss": 0.12339764833450317, "eval_roc_auc": 0.8783554248995846, "eval_runtime": 524.4656, "eval_samples_per_second": 5.503, "eval_steps_per_second": 0.174, "learning_rate": 0.0001, "step": 16653 }, { "epoch": 62.0, "eval_accuracy": 0.3049203049203049, "eval_f1_macro": 0.7124854785684515, "eval_f1_micro": 0.8169309505831026, "eval_loss": 0.12234435975551605, "eval_roc_auc": 0.876382515863111, "eval_runtime": 518.389, "eval_samples_per_second": 5.567, "eval_steps_per_second": 0.176, "learning_rate": 0.0001, "step": 16926 }, { "epoch": 62.27106227106227, "grad_norm": 0.2836596667766571, "learning_rate": 0.0001, "loss": 0.1238, "step": 17000 }, { "epoch": 63.0, "eval_accuracy": 0.30353430353430355, "eval_f1_macro": 0.709030237195192, "eval_f1_micro": 0.8151443922095366, "eval_loss": 0.12311259657144547, "eval_roc_auc": 0.875227363209172, "eval_runtime": 523.1406, "eval_samples_per_second": 5.517, "eval_steps_per_second": 0.174, "learning_rate": 0.0001, "step": 17199 }, { "epoch": 64.0, "eval_accuracy": 0.30665280665280664, "eval_f1_macro": 0.7114197657112039, "eval_f1_micro": 0.8183222681531587, "eval_loss": 0.12282687425613403, "eval_roc_auc": 0.8785221042646094, "eval_runtime": 525.9879, "eval_samples_per_second": 5.487, "eval_steps_per_second": 0.173, "learning_rate": 0.0001, "step": 17472 }, { "epoch": 64.1025641025641, "grad_norm": 0.327009916305542, "learning_rate": 0.0001, "loss": 0.1247, "step": 17500 }, { "epoch": 65.0, "eval_accuracy": 0.30353430353430355, "eval_f1_macro": 0.715610525327271, "eval_f1_micro": 0.8185065204751224, "eval_loss": 0.12305620312690735, "eval_roc_auc": 0.8802214933483853, "eval_runtime": 527.6963, "eval_samples_per_second": 5.469, "eval_steps_per_second": 0.172, "learning_rate": 0.0001, "step": 17745 }, { "epoch": 65.93406593406593, "grad_norm": 0.3439556360244751, "learning_rate": 0.0001, "loss": 0.123, "step": 18000 }, { "epoch": 66.0, "eval_accuracy": 0.30214830214830213, "eval_f1_macro": 0.7083957677770276, "eval_f1_micro": 0.8193021036471515, "eval_loss": 0.12252139300107956, "eval_roc_auc": 0.8809488409975973, "eval_runtime": 523.6027, "eval_samples_per_second": 5.512, "eval_steps_per_second": 0.174, "learning_rate": 0.0001, "step": 18018 }, { "epoch": 67.0, "eval_accuracy": 0.3031878031878032, "eval_f1_macro": 0.713563304331985, "eval_f1_micro": 0.8185542268382505, "eval_loss": 0.12215397506952286, "eval_roc_auc": 0.8813502879665707, "eval_runtime": 528.5406, "eval_samples_per_second": 5.46, "eval_steps_per_second": 0.172, "learning_rate": 0.0001, "step": 18291 }, { "epoch": 67.76556776556777, "grad_norm": 0.3434881269931793, "learning_rate": 0.0001, "loss": 0.1224, "step": 18500 }, { "epoch": 68.0, "eval_accuracy": 0.3090783090783091, "eval_f1_macro": 0.7169216330412181, "eval_f1_micro": 0.8201218248870841, "eval_loss": 0.12200037389993668, "eval_roc_auc": 0.8818022645643908, "eval_runtime": 525.6971, "eval_samples_per_second": 5.49, "eval_steps_per_second": 0.173, "learning_rate": 0.0001, "step": 18564 }, { "epoch": 69.0, "eval_accuracy": 0.30180180180180183, "eval_f1_macro": 0.7165157275423649, "eval_f1_micro": 0.8171493231633209, "eval_loss": 0.12282921373844147, "eval_roc_auc": 0.8767867663076429, "eval_runtime": 539.1574, "eval_samples_per_second": 5.353, "eval_steps_per_second": 0.169, "learning_rate": 0.0001, "step": 18837 }, { "epoch": 69.59706959706959, "grad_norm": 0.2773456275463104, "learning_rate": 0.0001, "loss": 0.1228, "step": 19000 }, { "epoch": 70.0, "eval_accuracy": 0.3042273042273042, "eval_f1_macro": 0.7130922408537738, "eval_f1_micro": 0.8176893032631977, "eval_loss": 0.12265007942914963, "eval_roc_auc": 0.8764658555456234, "eval_runtime": 532.0042, "eval_samples_per_second": 5.425, "eval_steps_per_second": 0.171, "learning_rate": 0.0001, "step": 19110 }, { "epoch": 71.0, "eval_accuracy": 0.29799029799029797, "eval_f1_macro": 0.7123118599173115, "eval_f1_micro": 0.8155257705805251, "eval_loss": 0.12318737804889679, "eval_roc_auc": 0.8733064995562728, "eval_runtime": 512.5227, "eval_samples_per_second": 5.631, "eval_steps_per_second": 0.178, "learning_rate": 0.0001, "step": 19383 }, { "epoch": 71.42857142857143, "grad_norm": 0.32921841740608215, "learning_rate": 0.0001, "loss": 0.1224, "step": 19500 }, { "epoch": 72.0, "eval_accuracy": 0.30561330561330563, "eval_f1_macro": 0.7181217472368024, "eval_f1_micro": 0.8177146438270315, "eval_loss": 0.12224896252155304, "eval_roc_auc": 0.8780131460200304, "eval_runtime": 526.8353, "eval_samples_per_second": 5.478, "eval_steps_per_second": 0.173, "learning_rate": 0.0001, "step": 19656 }, { "epoch": 73.0, "eval_accuracy": 0.3076923076923077, "eval_f1_macro": 0.7046690012290543, "eval_f1_micro": 0.8161570403926011, "eval_loss": 0.12214501202106476, "eval_roc_auc": 0.8759937448960649, "eval_runtime": 523.4325, "eval_samples_per_second": 5.514, "eval_steps_per_second": 0.174, "learning_rate": 0.0001, "step": 19929 }, { "epoch": 73.26007326007326, "grad_norm": 0.27500712871551514, "learning_rate": 0.0001, "loss": 0.122, "step": 20000 }, { "epoch": 74.0, "eval_accuracy": 0.2972972972972973, "eval_f1_macro": 0.7070482653980339, "eval_f1_micro": 0.8147835269271382, "eval_loss": 0.12297073751688004, "eval_roc_auc": 0.8731965201490751, "eval_runtime": 521.6748, "eval_samples_per_second": 5.532, "eval_steps_per_second": 0.174, "learning_rate": 0.0001, "step": 20202 }, { "epoch": 75.0, "eval_accuracy": 0.3049203049203049, "eval_f1_macro": 0.7123584497861349, "eval_f1_micro": 0.8175831550689987, "eval_loss": 0.12141965329647064, "eval_roc_auc": 0.876778409536002, "eval_runtime": 521.9582, "eval_samples_per_second": 5.529, "eval_steps_per_second": 0.174, "learning_rate": 1e-05, "step": 20475 }, { "epoch": 75.0915750915751, "grad_norm": 0.34586936235427856, "learning_rate": 1e-05, "loss": 0.1201, "step": 20500 }, { "epoch": 76.0, "eval_accuracy": 0.30665280665280664, "eval_f1_macro": 0.7265282519195887, "eval_f1_micro": 0.8212704324436167, "eval_loss": 0.12091591954231262, "eval_roc_auc": 0.8828403151052873, "eval_runtime": 515.687, "eval_samples_per_second": 5.596, "eval_steps_per_second": 0.176, "learning_rate": 1e-05, "step": 20748 }, { "epoch": 76.92307692307692, "grad_norm": 0.3650946617126465, "learning_rate": 1e-05, "loss": 0.1192, "step": 21000 }, { "epoch": 77.0, "eval_accuracy": 0.30734580734580735, "eval_f1_macro": 0.7249141687532618, "eval_f1_micro": 0.8221009885557243, "eval_loss": 0.12162773311138153, "eval_roc_auc": 0.88597146196019, "eval_runtime": 505.8066, "eval_samples_per_second": 5.706, "eval_steps_per_second": 0.18, "learning_rate": 1e-05, "step": 21021 }, { "epoch": 78.0, "eval_accuracy": 0.30561330561330563, "eval_f1_macro": 0.7232913822219021, "eval_f1_micro": 0.821013443640124, "eval_loss": 0.12114103883504868, "eval_roc_auc": 0.8828214151193448, "eval_runtime": 515.423, "eval_samples_per_second": 5.599, "eval_steps_per_second": 0.177, "learning_rate": 1e-05, "step": 21294 }, { "epoch": 78.75457875457876, "grad_norm": 0.3805921673774719, "learning_rate": 1e-05, "loss": 0.1178, "step": 21500 }, { "epoch": 79.0, "eval_accuracy": 0.30561330561330563, "eval_f1_macro": 0.7157592534107864, "eval_f1_micro": 0.8181284095677717, "eval_loss": 0.1210767850279808, "eval_roc_auc": 0.8769422854254683, "eval_runtime": 524.7026, "eval_samples_per_second": 5.5, "eval_steps_per_second": 0.173, "learning_rate": 1e-05, "step": 21567 }, { "epoch": 80.0, "eval_accuracy": 0.3090783090783091, "eval_f1_macro": 0.7196736600383237, "eval_f1_micro": 0.8200463116109824, "eval_loss": 0.12099559605121613, "eval_roc_auc": 0.8823936101146178, "eval_runtime": 518.5996, "eval_samples_per_second": 5.565, "eval_steps_per_second": 0.175, "learning_rate": 1e-05, "step": 21840 }, { "epoch": 80.58608058608058, "grad_norm": 0.38496658205986023, "learning_rate": 1e-05, "loss": 0.1178, "step": 22000 }, { "epoch": 81.0, "eval_accuracy": 0.31046431046431044, "eval_f1_macro": 0.7194056763702963, "eval_f1_micro": 0.8189727287937092, "eval_loss": 0.12053155153989792, "eval_roc_auc": 0.8783734261636972, "eval_runtime": 517.5249, "eval_samples_per_second": 5.577, "eval_steps_per_second": 0.176, "learning_rate": 1e-05, "step": 22113 }, { "epoch": 82.0, "eval_accuracy": 0.306999306999307, "eval_f1_macro": 0.7212694332008583, "eval_f1_micro": 0.8186875235267054, "eval_loss": 0.12050338089466095, "eval_roc_auc": 0.8782284502601733, "eval_runtime": 511.5713, "eval_samples_per_second": 5.641, "eval_steps_per_second": 0.178, "learning_rate": 1e-05, "step": 22386 }, { "epoch": 82.41758241758242, "grad_norm": 0.29807013273239136, "learning_rate": 1e-05, "loss": 0.1162, "step": 22500 }, { "epoch": 83.0, "eval_accuracy": 0.3049203049203049, "eval_f1_macro": 0.7136069207682542, "eval_f1_micro": 0.817129142279675, "eval_loss": 0.12153622508049011, "eval_roc_auc": 0.8753921914755026, "eval_runtime": 514.5554, "eval_samples_per_second": 5.609, "eval_steps_per_second": 0.177, "learning_rate": 1e-05, "step": 22659 }, { "epoch": 84.0, "eval_accuracy": 0.3115038115038115, "eval_f1_macro": 0.72263281374496, "eval_f1_micro": 0.8212135055442501, "eval_loss": 0.12091034650802612, "eval_roc_auc": 0.8817381602117871, "eval_runtime": 514.2801, "eval_samples_per_second": 5.612, "eval_steps_per_second": 0.177, "learning_rate": 1e-05, "step": 22932 }, { "epoch": 84.24908424908425, "grad_norm": 0.4926730692386627, "learning_rate": 1e-05, "loss": 0.1174, "step": 23000 }, { "epoch": 85.0, "eval_accuracy": 0.30942480942480943, "eval_f1_macro": 0.7219026145386024, "eval_f1_micro": 0.8212908842183808, "eval_loss": 0.12058679759502411, "eval_roc_auc": 0.8823288887291161, "eval_runtime": 513.2258, "eval_samples_per_second": 5.623, "eval_steps_per_second": 0.177, "learning_rate": 1e-05, "step": 23205 }, { "epoch": 86.0, "eval_accuracy": 0.30838530838530837, "eval_f1_macro": 0.7255503995321377, "eval_f1_micro": 0.8206727371003285, "eval_loss": 0.1210218220949173, "eval_roc_auc": 0.8810894976708349, "eval_runtime": 516.6336, "eval_samples_per_second": 5.586, "eval_steps_per_second": 0.176, "learning_rate": 1e-05, "step": 23478 }, { "epoch": 86.08058608058609, "grad_norm": 0.3941400647163391, "learning_rate": 1e-05, "loss": 0.1167, "step": 23500 }, { "epoch": 87.0, "eval_accuracy": 0.30734580734580735, "eval_f1_macro": 0.7163464112504625, "eval_f1_micro": 0.81919187715867, "eval_loss": 0.12097787857055664, "eval_roc_auc": 0.8800195357981024, "eval_runtime": 514.309, "eval_samples_per_second": 5.611, "eval_steps_per_second": 0.177, "learning_rate": 1e-05, "step": 23751 }, { "epoch": 87.91208791208791, "grad_norm": 0.30446189641952515, "learning_rate": 1e-05, "loss": 0.116, "step": 24000 }, { "epoch": 88.0, "eval_accuracy": 0.30942480942480943, "eval_f1_macro": 0.7179611359738045, "eval_f1_micro": 0.8219223445649475, "eval_loss": 0.12078534066677094, "eval_roc_auc": 0.8831166385335435, "eval_runtime": 513.9044, "eval_samples_per_second": 5.616, "eval_steps_per_second": 0.177, "learning_rate": 1e-05, "step": 24024 }, { "epoch": 89.0, "eval_accuracy": 0.3125433125433125, "eval_f1_macro": 0.7293063087262872, "eval_f1_micro": 0.8235824319895118, "eval_loss": 0.1213160827755928, "eval_roc_auc": 0.8871674997505042, "eval_runtime": 514.1086, "eval_samples_per_second": 5.614, "eval_steps_per_second": 0.177, "learning_rate": 1.0000000000000002e-06, "step": 24297 }, { "epoch": 89.74358974358974, "grad_norm": 0.2996889054775238, "learning_rate": 1.0000000000000002e-06, "loss": 0.1161, "step": 24500 }, { "epoch": 90.0, "eval_accuracy": 0.3108108108108108, "eval_f1_macro": 0.7249894355418997, "eval_f1_micro": 0.8228019165403988, "eval_loss": 0.12110408395528793, "eval_roc_auc": 0.8868651536304606, "eval_runtime": 511.9928, "eval_samples_per_second": 5.637, "eval_steps_per_second": 0.178, "learning_rate": 1.0000000000000002e-06, "step": 24570 }, { "epoch": 91.0, "eval_accuracy": 0.31046431046431044, "eval_f1_macro": 0.7187027508297176, "eval_f1_micro": 0.8191074795725959, "eval_loss": 0.1205781027674675, "eval_roc_auc": 0.8779146622039986, "eval_runtime": 513.1929, "eval_samples_per_second": 5.624, "eval_steps_per_second": 0.177, "learning_rate": 1.0000000000000002e-06, "step": 24843 }, { "epoch": 91.57509157509158, "grad_norm": 0.30445897579193115, "learning_rate": 1.0000000000000002e-06, "loss": 0.1162, "step": 25000 }, { "epoch": 92.0, "eval_accuracy": 0.31046431046431044, "eval_f1_macro": 0.7150284118631205, "eval_f1_micro": 0.8196009683612989, "eval_loss": 0.12076584249734879, "eval_roc_auc": 0.8793405313350767, "eval_runtime": 520.7671, "eval_samples_per_second": 5.542, "eval_steps_per_second": 0.175, "learning_rate": 1.0000000000000002e-06, "step": 25116 }, { "epoch": 92.0, "learning_rate": 1.0000000000000002e-06, "step": 25116, "total_flos": 1.1890234809282512e+21, "train_loss": 0.1360613288991788, "train_runtime": 194834.2342, "train_samples_per_second": 6.71, "train_steps_per_second": 0.21 } ], "logging_steps": 500, "max_steps": 40950, "num_input_tokens_seen": 0, "num_train_epochs": 150, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 10, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.1890234809282512e+21, "train_batch_size": 32, "trial_name": null, "trial_params": null }