{ "best_metric": 0.12031927704811096, "best_model_checkpoint": "Joseph-large-2024_09_16-batch-size32_epochs150_freeze/checkpoint-22113", "epoch": 91.0, "eval_steps": 500, "global_step": 24843, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.21656271656271656, "eval_f1_macro": 0.5384503258991854, "eval_f1_micro": 0.7477812526413659, "eval_loss": 0.17758780717849731, "eval_roc_auc": 0.8364480125638629, "eval_runtime": 594.1704, "eval_samples_per_second": 4.857, "eval_steps_per_second": 0.153, "learning_rate": 0.001, "step": 273 }, { "epoch": 1.8315018315018317, "grad_norm": 0.2863590717315674, "learning_rate": 0.001, "loss": 0.2726, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.24532224532224534, "eval_f1_macro": 0.5760774961516321, "eval_f1_micro": 0.7697450182129848, "eval_loss": 0.153945192694664, "eval_roc_auc": 0.8447603712499155, "eval_runtime": 583.9253, "eval_samples_per_second": 4.942, "eval_steps_per_second": 0.156, "learning_rate": 0.001, "step": 546 }, { "epoch": 3.0, "eval_accuracy": 0.2515592515592516, "eval_f1_macro": 0.6098114408992151, "eval_f1_micro": 0.7744839226208509, "eval_loss": 0.14735348522663116, "eval_roc_auc": 0.8447204921454395, "eval_runtime": 592.1092, "eval_samples_per_second": 4.874, "eval_steps_per_second": 0.154, "learning_rate": 0.001, "step": 819 }, { "epoch": 3.663003663003663, "grad_norm": 0.24709707498550415, "learning_rate": 0.001, "loss": 0.1701, "step": 1000 }, { "epoch": 4.0, "eval_accuracy": 0.25363825363825365, "eval_f1_macro": 0.6213514572326843, "eval_f1_micro": 0.7738915615654661, "eval_loss": 0.14645476639270782, "eval_roc_auc": 0.8440185501551167, "eval_runtime": 589.7047, "eval_samples_per_second": 4.894, "eval_steps_per_second": 0.154, "learning_rate": 0.001, "step": 1092 }, { "epoch": 5.0, "eval_accuracy": 0.25017325017325015, "eval_f1_macro": 0.6353051230272125, "eval_f1_micro": 0.78146492434663, "eval_loss": 0.14515458047389984, "eval_roc_auc": 0.850253359073442, "eval_runtime": 596.3244, "eval_samples_per_second": 4.84, "eval_steps_per_second": 0.153, "learning_rate": 0.001, "step": 1365 }, { "epoch": 5.4945054945054945, "grad_norm": 0.24183644354343414, "learning_rate": 0.001, "loss": 0.1622, "step": 1500 }, { "epoch": 6.0, "eval_accuracy": 0.2577962577962578, "eval_f1_macro": 0.6141782571643486, "eval_f1_micro": 0.781259480778399, "eval_loss": 0.1445809006690979, "eval_roc_auc": 0.8479418733548026, "eval_runtime": 590.8762, "eval_samples_per_second": 4.884, "eval_steps_per_second": 0.154, "learning_rate": 0.001, "step": 1638 }, { "epoch": 7.0, "eval_accuracy": 0.26195426195426197, "eval_f1_macro": 0.6232727577909734, "eval_f1_micro": 0.7800943800943801, "eval_loss": 0.14445114135742188, "eval_roc_auc": 0.8500464116049401, "eval_runtime": 601.2858, "eval_samples_per_second": 4.8, "eval_steps_per_second": 0.151, "learning_rate": 0.001, "step": 1911 }, { "epoch": 7.326007326007326, "grad_norm": 0.2069859653711319, "learning_rate": 0.001, "loss": 0.159, "step": 2000 }, { "epoch": 8.0, "eval_accuracy": 0.25848925848925847, "eval_f1_macro": 0.6339480584029394, "eval_f1_micro": 0.7879197465681098, "eval_loss": 0.14366209506988525, "eval_roc_auc": 0.8585186757078976, "eval_runtime": 584.2361, "eval_samples_per_second": 4.94, "eval_steps_per_second": 0.156, "learning_rate": 0.001, "step": 2184 }, { "epoch": 9.0, "eval_accuracy": 0.2577962577962578, "eval_f1_macro": 0.6442804243684905, "eval_f1_micro": 0.785476860138072, "eval_loss": 0.1447097659111023, "eval_roc_auc": 0.8547549628224266, "eval_runtime": 597.754, "eval_samples_per_second": 4.828, "eval_steps_per_second": 0.152, "learning_rate": 0.001, "step": 2457 }, { "epoch": 9.157509157509157, "grad_norm": 0.2012098729610443, "learning_rate": 0.001, "loss": 0.1563, "step": 2500 }, { "epoch": 10.0, "eval_accuracy": 0.2442827442827443, "eval_f1_macro": 0.6149084687726756, "eval_f1_micro": 0.7683399403144626, "eval_loss": 0.1538563072681427, "eval_roc_auc": 0.8340795854397406, "eval_runtime": 582.8083, "eval_samples_per_second": 4.952, "eval_steps_per_second": 0.156, "learning_rate": 0.001, "step": 2730 }, { "epoch": 10.989010989010989, "grad_norm": 0.1713365614414215, "learning_rate": 0.001, "loss": 0.1558, "step": 3000 }, { "epoch": 11.0, "eval_accuracy": 0.26334026334026334, "eval_f1_macro": 0.6334773464226039, "eval_f1_micro": 0.7896514859952961, "eval_loss": 0.1389196366071701, "eval_roc_auc": 0.8560665869710553, "eval_runtime": 581.0367, "eval_samples_per_second": 4.967, "eval_steps_per_second": 0.157, "learning_rate": 0.001, "step": 3003 }, { "epoch": 12.0, "eval_accuracy": 0.26403326403326405, "eval_f1_macro": 0.6406158966836866, "eval_f1_micro": 0.7908438442264407, "eval_loss": 0.1395249217748642, "eval_roc_auc": 0.8586128939486779, "eval_runtime": 577.8577, "eval_samples_per_second": 4.994, "eval_steps_per_second": 0.157, "learning_rate": 0.001, "step": 3276 }, { "epoch": 12.820512820512821, "grad_norm": 0.1511967033147812, "learning_rate": 0.001, "loss": 0.155, "step": 3500 }, { "epoch": 13.0, "eval_accuracy": 0.26507276507276506, "eval_f1_macro": 0.6557265830014797, "eval_f1_micro": 0.7893533497260687, "eval_loss": 0.1390257179737091, "eval_roc_auc": 0.8535165777960444, "eval_runtime": 583.6052, "eval_samples_per_second": 4.945, "eval_steps_per_second": 0.156, "learning_rate": 0.001, "step": 3549 }, { "epoch": 14.0, "eval_accuracy": 0.2623007623007623, "eval_f1_macro": 0.640540413256037, "eval_f1_micro": 0.787792943600309, "eval_loss": 0.13910652697086334, "eval_roc_auc": 0.8540478028799876, "eval_runtime": 575.9469, "eval_samples_per_second": 5.011, "eval_steps_per_second": 0.158, "learning_rate": 0.001, "step": 3822 }, { "epoch": 14.652014652014651, "grad_norm": 0.17730183899402618, "learning_rate": 0.001, "loss": 0.154, "step": 4000 }, { "epoch": 15.0, "eval_accuracy": 0.253984753984754, "eval_f1_macro": 0.6406412255611948, "eval_f1_micro": 0.7885381419454319, "eval_loss": 0.13990363478660583, "eval_roc_auc": 0.8550214137243141, "eval_runtime": 589.3337, "eval_samples_per_second": 4.897, "eval_steps_per_second": 0.154, "learning_rate": 0.001, "step": 4095 }, { "epoch": 16.0, "eval_accuracy": 0.2668052668052668, "eval_f1_macro": 0.6374513053376879, "eval_f1_micro": 0.7847859161051945, "eval_loss": 0.13938209414482117, "eval_roc_auc": 0.8490304535245456, "eval_runtime": 577.6041, "eval_samples_per_second": 4.997, "eval_steps_per_second": 0.158, "learning_rate": 0.001, "step": 4368 }, { "epoch": 16.483516483516482, "grad_norm": 0.16627363860607147, "learning_rate": 0.001, "loss": 0.1527, "step": 4500 }, { "epoch": 17.0, "eval_accuracy": 0.24185724185724186, "eval_f1_macro": 0.6424904129432089, "eval_f1_micro": 0.7857319587628866, "eval_loss": 0.15936270356178284, "eval_roc_auc": 0.8640092624686252, "eval_runtime": 582.0471, "eval_samples_per_second": 4.958, "eval_steps_per_second": 0.156, "learning_rate": 0.001, "step": 4641 }, { "epoch": 18.0, "eval_accuracy": 0.27546777546777546, "eval_f1_macro": 0.6768028620378452, "eval_f1_micro": 0.8036556603773585, "eval_loss": 0.13188092410564423, "eval_roc_auc": 0.8679473273890709, "eval_runtime": 578.2193, "eval_samples_per_second": 4.991, "eval_steps_per_second": 0.157, "learning_rate": 0.0001, "step": 4914 }, { "epoch": 18.315018315018314, "grad_norm": 0.17489519715309143, "learning_rate": 0.0001, "loss": 0.149, "step": 5000 }, { "epoch": 19.0, "eval_accuracy": 0.27893277893277896, "eval_f1_macro": 0.6715138701269487, "eval_f1_micro": 0.8038422649140546, "eval_loss": 0.13244545459747314, "eval_roc_auc": 0.8679532160291196, "eval_runtime": 573.231, "eval_samples_per_second": 5.035, "eval_steps_per_second": 0.159, "learning_rate": 0.0001, "step": 5187 }, { "epoch": 20.0, "eval_accuracy": 0.27893277893277896, "eval_f1_macro": 0.6733647561041333, "eval_f1_micro": 0.8066104665720725, "eval_loss": 0.1306440383195877, "eval_roc_auc": 0.8722063701403293, "eval_runtime": 585.8615, "eval_samples_per_second": 4.926, "eval_steps_per_second": 0.155, "learning_rate": 0.0001, "step": 5460 }, { "epoch": 20.146520146520146, "grad_norm": 0.15949666500091553, "learning_rate": 0.0001, "loss": 0.1412, "step": 5500 }, { "epoch": 21.0, "eval_accuracy": 0.2817047817047817, "eval_f1_macro": 0.6728395801753237, "eval_f1_micro": 0.8037271837637748, "eval_loss": 0.1302667111158371, "eval_roc_auc": 0.8650841471907833, "eval_runtime": 573.2529, "eval_samples_per_second": 5.034, "eval_steps_per_second": 0.159, "learning_rate": 0.0001, "step": 5733 }, { "epoch": 21.978021978021978, "grad_norm": 0.1902228742837906, "learning_rate": 0.0001, "loss": 0.1385, "step": 6000 }, { "epoch": 22.0, "eval_accuracy": 0.2841302841302841, "eval_f1_macro": 0.6735047356746011, "eval_f1_micro": 0.8074214632089395, "eval_loss": 0.12870918214321136, "eval_roc_auc": 0.869659164440877, "eval_runtime": 581.2149, "eval_samples_per_second": 4.965, "eval_steps_per_second": 0.157, "learning_rate": 0.0001, "step": 6006 }, { "epoch": 23.0, "eval_accuracy": 0.2841302841302841, "eval_f1_macro": 0.678520497542563, "eval_f1_micro": 0.8058198574902932, "eval_loss": 0.1287251114845276, "eval_roc_auc": 0.8654212081182214, "eval_runtime": 576.7801, "eval_samples_per_second": 5.004, "eval_steps_per_second": 0.158, "learning_rate": 0.0001, "step": 6279 }, { "epoch": 23.80952380952381, "grad_norm": 0.20738890767097473, "learning_rate": 0.0001, "loss": 0.1377, "step": 6500 }, { "epoch": 24.0, "eval_accuracy": 0.2869022869022869, "eval_f1_macro": 0.6840871439155845, "eval_f1_micro": 0.8057504997660669, "eval_loss": 0.1279863715171814, "eval_roc_auc": 0.8662783338824032, "eval_runtime": 580.6409, "eval_samples_per_second": 4.97, "eval_steps_per_second": 0.157, "learning_rate": 0.0001, "step": 6552 }, { "epoch": 25.0, "eval_accuracy": 0.28586278586278585, "eval_f1_macro": 0.6787317976982782, "eval_f1_micro": 0.8074392712550608, "eval_loss": 0.127402663230896, "eval_roc_auc": 0.8696325247161917, "eval_runtime": 571.2341, "eval_samples_per_second": 5.052, "eval_steps_per_second": 0.159, "learning_rate": 0.0001, "step": 6825 }, { "epoch": 25.641025641025642, "grad_norm": 0.18043966591358185, "learning_rate": 0.0001, "loss": 0.1361, "step": 7000 }, { "epoch": 26.0, "eval_accuracy": 0.28586278586278585, "eval_f1_macro": 0.6740298841901063, "eval_f1_micro": 0.8063818050664064, "eval_loss": 0.12828372418880463, "eval_roc_auc": 0.8672987185031534, "eval_runtime": 580.3575, "eval_samples_per_second": 4.973, "eval_steps_per_second": 0.157, "learning_rate": 0.0001, "step": 7098 }, { "epoch": 27.0, "eval_accuracy": 0.2882882882882883, "eval_f1_macro": 0.68897744745899, "eval_f1_micro": 0.8110456615281781, "eval_loss": 0.12681305408477783, "eval_roc_auc": 0.8743537519356053, "eval_runtime": 580.6984, "eval_samples_per_second": 4.97, "eval_steps_per_second": 0.157, "learning_rate": 0.0001, "step": 7371 }, { "epoch": 27.47252747252747, "grad_norm": 0.17303021252155304, "learning_rate": 0.0001, "loss": 0.1354, "step": 7500 }, { "epoch": 28.0, "eval_accuracy": 0.28932778932778935, "eval_f1_macro": 0.6812786729949134, "eval_f1_micro": 0.8099940913311386, "eval_loss": 0.12666279077529907, "eval_roc_auc": 0.8708112319303257, "eval_runtime": 576.4341, "eval_samples_per_second": 5.007, "eval_steps_per_second": 0.158, "learning_rate": 0.0001, "step": 7644 }, { "epoch": 29.0, "eval_accuracy": 0.29175329175329173, "eval_f1_macro": 0.6881122302734826, "eval_f1_micro": 0.8081058020477816, "eval_loss": 0.12675043940544128, "eval_roc_auc": 0.8666940799181009, "eval_runtime": 578.6537, "eval_samples_per_second": 4.987, "eval_steps_per_second": 0.157, "learning_rate": 0.0001, "step": 7917 }, { "epoch": 29.304029304029303, "grad_norm": 0.17676015198230743, "learning_rate": 0.0001, "loss": 0.1339, "step": 8000 }, { "epoch": 30.0, "eval_accuracy": 0.2927927927927928, "eval_f1_macro": 0.6872571297964245, "eval_f1_micro": 0.8108657880239013, "eval_loss": 0.12635387480258942, "eval_roc_auc": 0.8701037366439205, "eval_runtime": 582.8932, "eval_samples_per_second": 4.951, "eval_steps_per_second": 0.156, "learning_rate": 0.0001, "step": 8190 }, { "epoch": 31.0, "eval_accuracy": 0.29140679140679143, "eval_f1_macro": 0.6823767206574823, "eval_f1_micro": 0.8089332139965051, "eval_loss": 0.1258317530155182, "eval_roc_auc": 0.867421427567313, "eval_runtime": 578.6563, "eval_samples_per_second": 4.987, "eval_steps_per_second": 0.157, "learning_rate": 0.0001, "step": 8463 }, { "epoch": 31.135531135531135, "grad_norm": 0.19538064301013947, "learning_rate": 0.0001, "loss": 0.1332, "step": 8500 }, { "epoch": 32.0, "eval_accuracy": 0.29313929313929316, "eval_f1_macro": 0.6924178674344362, "eval_f1_micro": 0.8112645318336341, "eval_loss": 0.1260402798652649, "eval_roc_auc": 0.8731307927316745, "eval_runtime": 588.1542, "eval_samples_per_second": 4.907, "eval_steps_per_second": 0.155, "learning_rate": 0.0001, "step": 8736 }, { "epoch": 32.967032967032964, "grad_norm": 0.20809406042099, "learning_rate": 0.0001, "loss": 0.1321, "step": 9000 }, { "epoch": 33.0, "eval_accuracy": 0.2910602910602911, "eval_f1_macro": 0.6959916792345996, "eval_f1_micro": 0.8133097762073027, "eval_loss": 0.1250443458557129, "eval_roc_auc": 0.8735809182308003, "eval_runtime": 593.1758, "eval_samples_per_second": 4.865, "eval_steps_per_second": 0.153, "learning_rate": 0.0001, "step": 9009 }, { "epoch": 34.0, "eval_accuracy": 0.29417879417879417, "eval_f1_macro": 0.6891130310994343, "eval_f1_micro": 0.8116187492060803, "eval_loss": 0.12511762976646423, "eval_roc_auc": 0.8707582878249215, "eval_runtime": 596.9449, "eval_samples_per_second": 4.835, "eval_steps_per_second": 0.152, "learning_rate": 0.0001, "step": 9282 }, { "epoch": 34.798534798534796, "grad_norm": 0.23537498712539673, "learning_rate": 0.0001, "loss": 0.1309, "step": 9500 }, { "epoch": 35.0, "eval_accuracy": 0.2955647955647956, "eval_f1_macro": 0.6945448365895581, "eval_f1_micro": 0.8124288545048274, "eval_loss": 0.12488266825675964, "eval_roc_auc": 0.8724487031041583, "eval_runtime": 587.4735, "eval_samples_per_second": 4.913, "eval_steps_per_second": 0.155, "learning_rate": 0.0001, "step": 9555 }, { "epoch": 36.0, "eval_accuracy": 0.29417879417879417, "eval_f1_macro": 0.6971439978031583, "eval_f1_micro": 0.8115410842141152, "eval_loss": 0.1252983808517456, "eval_roc_auc": 0.8688457809436633, "eval_runtime": 588.3939, "eval_samples_per_second": 4.905, "eval_steps_per_second": 0.155, "learning_rate": 0.0001, "step": 9828 }, { "epoch": 36.63003663003663, "grad_norm": 0.2208484709262848, "learning_rate": 0.0001, "loss": 0.1305, "step": 10000 }, { "epoch": 37.0, "eval_accuracy": 0.29521829521829523, "eval_f1_macro": 0.6961006786941204, "eval_f1_micro": 0.8116249469664828, "eval_loss": 0.12479764968156815, "eval_roc_auc": 0.870187076326433, "eval_runtime": 588.2172, "eval_samples_per_second": 4.906, "eval_steps_per_second": 0.155, "learning_rate": 0.0001, "step": 10101 }, { "epoch": 38.0, "eval_accuracy": 0.3004158004158004, "eval_f1_macro": 0.6991177533793484, "eval_f1_micro": 0.8129930394431555, "eval_loss": 0.12497606873512268, "eval_roc_auc": 0.8725859392689393, "eval_runtime": 593.6086, "eval_samples_per_second": 4.862, "eval_steps_per_second": 0.153, "learning_rate": 0.0001, "step": 10374 }, { "epoch": 38.46153846153846, "grad_norm": 0.27702799439430237, "learning_rate": 0.0001, "loss": 0.1285, "step": 10500 }, { "epoch": 39.0, "eval_accuracy": 0.29521829521829523, "eval_f1_macro": 0.6970545191351545, "eval_f1_micro": 0.8141541282874172, "eval_loss": 0.1252022236585617, "eval_roc_auc": 0.8767588388621269, "eval_runtime": 586.6264, "eval_samples_per_second": 4.92, "eval_steps_per_second": 0.155, "learning_rate": 0.0001, "step": 10647 }, { "epoch": 40.0, "eval_accuracy": 0.2955647955647956, "eval_f1_macro": 0.7070171403235663, "eval_f1_micro": 0.816655585106383, "eval_loss": 0.12485132366418839, "eval_roc_auc": 0.8790201839509206, "eval_runtime": 586.1423, "eval_samples_per_second": 4.924, "eval_steps_per_second": 0.155, "learning_rate": 0.0001, "step": 10920 }, { "epoch": 40.29304029304029, "grad_norm": 0.24468237161636353, "learning_rate": 0.0001, "loss": 0.129, "step": 11000 }, { "epoch": 41.0, "eval_accuracy": 0.28967428967428965, "eval_f1_macro": 0.6961881266838973, "eval_f1_micro": 0.8103573101656658, "eval_loss": 0.12500154972076416, "eval_roc_auc": 0.8684120872988876, "eval_runtime": 589.4017, "eval_samples_per_second": 4.896, "eval_steps_per_second": 0.154, "learning_rate": 0.0001, "step": 11193 }, { "epoch": 42.0, "eval_accuracy": 0.3038808038808039, "eval_f1_macro": 0.7064304960359926, "eval_f1_micro": 0.816535301022975, "eval_loss": 0.12350151687860489, "eval_roc_auc": 0.8763375931853813, "eval_runtime": 591.1388, "eval_samples_per_second": 4.882, "eval_steps_per_second": 0.154, "learning_rate": 0.0001, "step": 11466 }, { "epoch": 42.124542124542124, "grad_norm": 0.29772019386291504, "learning_rate": 0.0001, "loss": 0.1277, "step": 11500 }, { "epoch": 43.0, "eval_accuracy": 0.2955647955647956, "eval_f1_macro": 0.7047254887418923, "eval_f1_micro": 0.8150093808630394, "eval_loss": 0.12367021292448044, "eval_roc_auc": 0.8771339814503815, "eval_runtime": 597.1526, "eval_samples_per_second": 4.833, "eval_steps_per_second": 0.152, "learning_rate": 0.0001, "step": 11739 }, { "epoch": 43.956043956043956, "grad_norm": 0.32455483078956604, "learning_rate": 0.0001, "loss": 0.1279, "step": 12000 }, { "epoch": 44.0, "eval_accuracy": 0.30076230076230076, "eval_f1_macro": 0.705396366545505, "eval_f1_micro": 0.8170209225905745, "eval_loss": 0.12371324002742767, "eval_roc_auc": 0.8789390307110687, "eval_runtime": 618.592, "eval_samples_per_second": 4.665, "eval_steps_per_second": 0.147, "learning_rate": 0.0001, "step": 12012 }, { "epoch": 45.0, "eval_accuracy": 0.30145530145530147, "eval_f1_macro": 0.7058009223379548, "eval_f1_micro": 0.8163231034048448, "eval_loss": 0.12333343178033829, "eval_roc_auc": 0.8758137724962146, "eval_runtime": 593.5671, "eval_samples_per_second": 4.862, "eval_steps_per_second": 0.153, "learning_rate": 0.0001, "step": 12285 }, { "epoch": 45.78754578754579, "grad_norm": 0.2913854718208313, "learning_rate": 0.0001, "loss": 0.1264, "step": 12500 }, { "epoch": 46.0, "eval_accuracy": 0.30076230076230076, "eval_f1_macro": 0.6992655670184796, "eval_f1_micro": 0.8158692722371967, "eval_loss": 0.12297776341438293, "eval_roc_auc": 0.8746400552002995, "eval_runtime": 597.9766, "eval_samples_per_second": 4.826, "eval_steps_per_second": 0.152, "learning_rate": 0.0001, "step": 12558 }, { "epoch": 47.0, "eval_accuracy": 0.29902979902979904, "eval_f1_macro": 0.7026416067016249, "eval_f1_micro": 0.8135392426486143, "eval_loss": 0.12366960942745209, "eval_roc_auc": 0.8720258880176504, "eval_runtime": 608.7312, "eval_samples_per_second": 4.741, "eval_steps_per_second": 0.149, "learning_rate": 0.0001, "step": 12831 }, { "epoch": 47.61904761904762, "grad_norm": 0.2678842842578888, "learning_rate": 0.0001, "loss": 0.1267, "step": 13000 }, { "epoch": 48.0, "eval_accuracy": 0.30180180180180183, "eval_f1_macro": 0.7044430417074125, "eval_f1_micro": 0.8169049621530698, "eval_loss": 0.12326876819133759, "eval_roc_auc": 0.8756520830494087, "eval_runtime": 581.2988, "eval_samples_per_second": 4.965, "eval_steps_per_second": 0.157, "learning_rate": 0.0001, "step": 13104 }, { "epoch": 49.0, "eval_accuracy": 0.30214830214830213, "eval_f1_macro": 0.705026725915288, "eval_f1_micro": 0.8161126713333613, "eval_loss": 0.12315386533737183, "eval_roc_auc": 0.8761665945900701, "eval_runtime": 586.6485, "eval_samples_per_second": 4.919, "eval_steps_per_second": 0.155, "learning_rate": 0.0001, "step": 13377 }, { "epoch": 49.45054945054945, "grad_norm": 0.31903648376464844, "learning_rate": 0.0001, "loss": 0.1249, "step": 13500 }, { "epoch": 50.0, "eval_accuracy": 0.30145530145530147, "eval_f1_macro": 0.7085649491291086, "eval_f1_micro": 0.8179686845851126, "eval_loss": 0.12265044450759888, "eval_roc_auc": 0.877547259354487, "eval_runtime": 581.1034, "eval_samples_per_second": 4.966, "eval_steps_per_second": 0.157, "learning_rate": 0.0001, "step": 13650 }, { "epoch": 51.0, "eval_accuracy": 0.30214830214830213, "eval_f1_macro": 0.710831288086539, "eval_f1_micro": 0.8190420609445996, "eval_loss": 0.12310674786567688, "eval_roc_auc": 0.87941579593488, "eval_runtime": 606.1273, "eval_samples_per_second": 4.761, "eval_steps_per_second": 0.15, "learning_rate": 0.0001, "step": 13923 }, { "epoch": 51.282051282051285, "grad_norm": 0.26118701696395874, "learning_rate": 0.0001, "loss": 0.1243, "step": 14000 }, { "epoch": 52.0, "eval_accuracy": 0.30214830214830213, "eval_f1_macro": 0.704117146056294, "eval_f1_micro": 0.816390260370511, "eval_loss": 0.12280686944723129, "eval_roc_auc": 0.8743457841629654, "eval_runtime": 611.1334, "eval_samples_per_second": 4.722, "eval_steps_per_second": 0.149, "learning_rate": 0.0001, "step": 14196 }, { "epoch": 53.0, "eval_accuracy": 0.3038808038808039, "eval_f1_macro": 0.7080185810697228, "eval_f1_micro": 0.8189015751312609, "eval_loss": 0.1225290596485138, "eval_roc_auc": 0.8794495583654101, "eval_runtime": 586.3165, "eval_samples_per_second": 4.922, "eval_steps_per_second": 0.155, "learning_rate": 0.0001, "step": 14469 }, { "epoch": 53.11355311355312, "grad_norm": 0.24176084995269775, "learning_rate": 0.0001, "loss": 0.1248, "step": 14500 }, { "epoch": 54.0, "eval_accuracy": 0.30180180180180183, "eval_f1_macro": 0.7053875588266636, "eval_f1_micro": 0.8162527837304089, "eval_loss": 0.12376156449317932, "eval_roc_auc": 0.8755284752634194, "eval_runtime": 590.1707, "eval_samples_per_second": 4.89, "eval_steps_per_second": 0.154, "learning_rate": 0.0001, "step": 14742 }, { "epoch": 54.94505494505494, "grad_norm": 0.3652552366256714, "learning_rate": 0.0001, "loss": 0.1233, "step": 15000 }, { "epoch": 55.0, "eval_accuracy": 0.30284130284130284, "eval_f1_macro": 0.7092508494713976, "eval_f1_micro": 0.818075117370892, "eval_loss": 0.12211860716342926, "eval_roc_auc": 0.8772347053641084, "eval_runtime": 591.2857, "eval_samples_per_second": 4.881, "eval_steps_per_second": 0.154, "learning_rate": 0.0001, "step": 15015 }, { "epoch": 56.0, "eval_accuracy": 0.3049203049203049, "eval_f1_macro": 0.7091508009521661, "eval_f1_micro": 0.818769689935334, "eval_loss": 0.12255053967237473, "eval_roc_auc": 0.8809376807503978, "eval_runtime": 619.809, "eval_samples_per_second": 4.656, "eval_steps_per_second": 0.147, "learning_rate": 0.0001, "step": 15288 }, { "epoch": 56.776556776556774, "grad_norm": 0.2984638214111328, "learning_rate": 0.0001, "loss": 0.1237, "step": 15500 }, { "epoch": 57.0, "eval_accuracy": 0.3052668052668053, "eval_f1_macro": 0.7056269081565454, "eval_f1_micro": 0.8183564389510606, "eval_loss": 0.12233822792768478, "eval_roc_auc": 0.8785286099375562, "eval_runtime": 613.5916, "eval_samples_per_second": 4.703, "eval_steps_per_second": 0.148, "learning_rate": 0.0001, "step": 15561 }, { "epoch": 58.0, "eval_accuracy": 0.30284130284130284, "eval_f1_macro": 0.7093876090831799, "eval_f1_micro": 0.8179678964618875, "eval_loss": 0.12230789661407471, "eval_roc_auc": 0.8764581158068805, "eval_runtime": 607.5944, "eval_samples_per_second": 4.75, "eval_steps_per_second": 0.15, "learning_rate": 0.0001, "step": 15834 }, { "epoch": 58.608058608058606, "grad_norm": 0.2825208604335785, "learning_rate": 0.0001, "loss": 0.1234, "step": 16000 }, { "epoch": 59.0, "eval_accuracy": 0.30734580734580735, "eval_f1_macro": 0.7102428483836337, "eval_f1_micro": 0.8198051269184126, "eval_loss": 0.12226579338312149, "eval_roc_auc": 0.8788947787212716, "eval_runtime": 579.6776, "eval_samples_per_second": 4.979, "eval_steps_per_second": 0.157, "learning_rate": 0.0001, "step": 16107 }, { "epoch": 60.0, "eval_accuracy": 0.29799029799029797, "eval_f1_macro": 0.7068409531794828, "eval_f1_micro": 0.8173416232565955, "eval_loss": 0.1236739531159401, "eval_roc_auc": 0.8761882980515712, "eval_runtime": 577.7976, "eval_samples_per_second": 4.995, "eval_steps_per_second": 0.157, "learning_rate": 0.0001, "step": 16380 }, { "epoch": 60.43956043956044, "grad_norm": 0.313997745513916, "learning_rate": 0.0001, "loss": 0.1232, "step": 16500 }, { "epoch": 61.0, "eval_accuracy": 0.305959805959806, "eval_f1_macro": 0.7139384635953806, "eval_f1_micro": 0.8201011747982775, "eval_loss": 0.12236195057630539, "eval_roc_auc": 0.8791326851447451, "eval_runtime": 576.7269, "eval_samples_per_second": 5.004, "eval_steps_per_second": 0.158, "learning_rate": 0.0001, "step": 16653 }, { "epoch": 62.0, "eval_accuracy": 0.30284130284130284, "eval_f1_macro": 0.7188990083298508, "eval_f1_micro": 0.8209334277030684, "eval_loss": 0.12215279042720795, "eval_roc_auc": 0.8808361117698756, "eval_runtime": 585.2414, "eval_samples_per_second": 4.931, "eval_steps_per_second": 0.155, "learning_rate": 1e-05, "step": 16926 }, { "epoch": 62.27106227106227, "grad_norm": 0.322051465511322, "learning_rate": 1e-05, "loss": 0.1204, "step": 17000 }, { "epoch": 63.0, "eval_accuracy": 0.3097713097713098, "eval_f1_macro": 0.7190866276619315, "eval_f1_micro": 0.820752746564184, "eval_loss": 0.12084941565990448, "eval_roc_auc": 0.8797410795822204, "eval_runtime": 586.6749, "eval_samples_per_second": 4.919, "eval_steps_per_second": 0.155, "learning_rate": 1e-05, "step": 17199 }, { "epoch": 64.0, "eval_accuracy": 0.3108108108108108, "eval_f1_macro": 0.7187730185556146, "eval_f1_micro": 0.8218151540383014, "eval_loss": 0.12093428522348404, "eval_roc_auc": 0.8813373302757117, "eval_runtime": 589.1584, "eval_samples_per_second": 4.899, "eval_steps_per_second": 0.154, "learning_rate": 1e-05, "step": 17472 }, { "epoch": 64.1025641025641, "grad_norm": 0.32896944880485535, "learning_rate": 1e-05, "loss": 0.12, "step": 17500 }, { "epoch": 65.0, "eval_accuracy": 0.30803880803880807, "eval_f1_macro": 0.7186584702198188, "eval_f1_micro": 0.8209837715435904, "eval_loss": 0.12085793167352676, "eval_roc_auc": 0.8787241154699269, "eval_runtime": 587.8337, "eval_samples_per_second": 4.91, "eval_steps_per_second": 0.155, "learning_rate": 1e-05, "step": 17745 }, { "epoch": 65.93406593406593, "grad_norm": 0.3432726263999939, "learning_rate": 1e-05, "loss": 0.1187, "step": 18000 }, { "epoch": 66.0, "eval_accuracy": 0.3135828135828136, "eval_f1_macro": 0.7185770967712465, "eval_f1_micro": 0.8215507887488523, "eval_loss": 0.12076118588447571, "eval_roc_auc": 0.8804684272314324, "eval_runtime": 588.6607, "eval_samples_per_second": 4.903, "eval_steps_per_second": 0.155, "learning_rate": 1e-05, "step": 18018 }, { "epoch": 67.0, "eval_accuracy": 0.31115731115731116, "eval_f1_macro": 0.7239469969506999, "eval_f1_micro": 0.8232429532417151, "eval_loss": 0.1210499182343483, "eval_roc_auc": 0.8847649346106092, "eval_runtime": 591.9527, "eval_samples_per_second": 4.875, "eval_steps_per_second": 0.154, "learning_rate": 1e-05, "step": 18291 }, { "epoch": 67.76556776556777, "grad_norm": 0.3457956612110138, "learning_rate": 1e-05, "loss": 0.1179, "step": 18500 }, { "epoch": 68.0, "eval_accuracy": 0.3125433125433125, "eval_f1_macro": 0.720063006101889, "eval_f1_micro": 0.8211584808443447, "eval_loss": 0.1208076998591423, "eval_roc_auc": 0.8815457934989414, "eval_runtime": 585.5944, "eval_samples_per_second": 4.928, "eval_steps_per_second": 0.155, "learning_rate": 1e-05, "step": 18564 }, { "epoch": 69.0, "eval_accuracy": 0.31011781011781014, "eval_f1_macro": 0.7197984794848579, "eval_f1_micro": 0.821014765549839, "eval_loss": 0.12105683237314224, "eval_roc_auc": 0.8794876936812611, "eval_runtime": 586.2959, "eval_samples_per_second": 4.922, "eval_steps_per_second": 0.155, "learning_rate": 1e-05, "step": 18837 }, { "epoch": 69.59706959706959, "grad_norm": 0.2593284845352173, "learning_rate": 1e-05, "loss": 0.1177, "step": 19000 }, { "epoch": 70.0, "eval_accuracy": 0.31115731115731116, "eval_f1_macro": 0.719699492247552, "eval_f1_micro": 0.821309285237141, "eval_loss": 0.12111356854438782, "eval_roc_auc": 0.8802103867562204, "eval_runtime": 592.4319, "eval_samples_per_second": 4.871, "eval_steps_per_second": 0.154, "learning_rate": 1e-05, "step": 19110 }, { "epoch": 71.0, "eval_accuracy": 0.31115731115731116, "eval_f1_macro": 0.7163966165871272, "eval_f1_micro": 0.8206033106461642, "eval_loss": 0.12063230574131012, "eval_roc_auc": 0.8779552656514418, "eval_runtime": 588.5502, "eval_samples_per_second": 4.904, "eval_steps_per_second": 0.155, "learning_rate": 1e-05, "step": 19383 }, { "epoch": 71.42857142857143, "grad_norm": 0.3827510476112366, "learning_rate": 1e-05, "loss": 0.1179, "step": 19500 }, { "epoch": 72.0, "eval_accuracy": 0.3128898128898129, "eval_f1_macro": 0.7171818163524962, "eval_f1_micro": 0.8206118081490495, "eval_loss": 0.12075439840555191, "eval_roc_auc": 0.8783204820582929, "eval_runtime": 584.4453, "eval_samples_per_second": 4.938, "eval_steps_per_second": 0.156, "learning_rate": 1e-05, "step": 19656 }, { "epoch": 73.0, "eval_accuracy": 0.31323631323631324, "eval_f1_macro": 0.7214307826544399, "eval_f1_micro": 0.8217462106977327, "eval_loss": 0.12078637629747391, "eval_roc_auc": 0.8804145307491638, "eval_runtime": 588.6768, "eval_samples_per_second": 4.903, "eval_steps_per_second": 0.155, "learning_rate": 1e-05, "step": 19929 }, { "epoch": 73.26007326007326, "grad_norm": 0.29221299290657043, "learning_rate": 1e-05, "loss": 0.1177, "step": 20000 }, { "epoch": 74.0, "eval_accuracy": 0.3108108108108108, "eval_f1_macro": 0.715483654869702, "eval_f1_micro": 0.8200794388574326, "eval_loss": 0.12086880952119827, "eval_roc_auc": 0.8759783190736136, "eval_runtime": 595.2281, "eval_samples_per_second": 4.849, "eval_steps_per_second": 0.153, "learning_rate": 1e-05, "step": 20202 }, { "epoch": 75.0, "eval_accuracy": 0.3153153153153153, "eval_f1_macro": 0.7151281975948514, "eval_f1_micro": 0.8207404925448148, "eval_loss": 0.12054955214262009, "eval_roc_auc": 0.8789508616462007, "eval_runtime": 588.2243, "eval_samples_per_second": 4.906, "eval_steps_per_second": 0.155, "learning_rate": 1e-05, "step": 20475 }, { "epoch": 75.0915750915751, "grad_norm": 0.3488374352455139, "learning_rate": 1e-05, "loss": 0.1171, "step": 20500 }, { "epoch": 76.0, "eval_accuracy": 0.31566181566181567, "eval_f1_macro": 0.722403613960237, "eval_f1_micro": 0.8221261740503699, "eval_loss": 0.12033110857009888, "eval_roc_auc": 0.8820333836259857, "eval_runtime": 591.3419, "eval_samples_per_second": 4.88, "eval_steps_per_second": 0.154, "learning_rate": 1e-05, "step": 20748 }, { "epoch": 76.92307692307692, "grad_norm": 0.37617847323417664, "learning_rate": 1e-05, "loss": 0.1171, "step": 21000 }, { "epoch": 77.0, "eval_accuracy": 0.3135828135828136, "eval_f1_macro": 0.7234417953998725, "eval_f1_micro": 0.8231996372480317, "eval_loss": 0.12079885601997375, "eval_roc_auc": 0.8850904999468814, "eval_runtime": 598.1869, "eval_samples_per_second": 4.825, "eval_steps_per_second": 0.152, "learning_rate": 1e-05, "step": 21021 }, { "epoch": 78.0, "eval_accuracy": 0.3115038115038115, "eval_f1_macro": 0.7233107692667189, "eval_f1_micro": 0.8230326613403982, "eval_loss": 0.12099317461252213, "eval_roc_auc": 0.8837231818723246, "eval_runtime": 585.8629, "eval_samples_per_second": 4.926, "eval_steps_per_second": 0.155, "learning_rate": 1e-05, "step": 21294 }, { "epoch": 78.75457875457876, "grad_norm": 0.36047452688217163, "learning_rate": 1e-05, "loss": 0.1168, "step": 21500 }, { "epoch": 79.0, "eval_accuracy": 0.31011781011781014, "eval_f1_macro": 0.7172980311198172, "eval_f1_micro": 0.8202369947054374, "eval_loss": 0.12051720172166824, "eval_roc_auc": 0.8777043478820749, "eval_runtime": 595.4358, "eval_samples_per_second": 4.847, "eval_steps_per_second": 0.153, "learning_rate": 1e-05, "step": 21567 }, { "epoch": 80.0, "eval_accuracy": 0.31185031185031187, "eval_f1_macro": 0.7248558336823359, "eval_f1_micro": 0.8231793006530544, "eval_loss": 0.12073608487844467, "eval_roc_auc": 0.8842671366132298, "eval_runtime": 604.2206, "eval_samples_per_second": 4.776, "eval_steps_per_second": 0.151, "learning_rate": 1e-05, "step": 21840 }, { "epoch": 80.58608058608058, "grad_norm": 0.35180962085723877, "learning_rate": 1e-05, "loss": 0.1171, "step": 22000 }, { "epoch": 81.0, "eval_accuracy": 0.3128898128898129, "eval_f1_macro": 0.7212996450160633, "eval_f1_micro": 0.822080253872813, "eval_loss": 0.12031927704811096, "eval_roc_auc": 0.8805991577232666, "eval_runtime": 597.4583, "eval_samples_per_second": 4.83, "eval_steps_per_second": 0.152, "learning_rate": 1e-05, "step": 22113 }, { "epoch": 82.0, "eval_accuracy": 0.3142758142758143, "eval_f1_macro": 0.7178066335813648, "eval_f1_micro": 0.8215302193202746, "eval_loss": 0.1204884946346283, "eval_roc_auc": 0.8795852787754633, "eval_runtime": 583.2227, "eval_samples_per_second": 4.948, "eval_steps_per_second": 0.156, "learning_rate": 1e-05, "step": 22386 }, { "epoch": 82.41758241758242, "grad_norm": 0.3334418535232544, "learning_rate": 1.0000000000000002e-06, "loss": 0.1157, "step": 22500 }, { "epoch": 83.0, "eval_accuracy": 0.31115731115731116, "eval_f1_macro": 0.7113142483409282, "eval_f1_micro": 0.8179971218149497, "eval_loss": 0.12136666476726532, "eval_roc_auc": 0.8743461195069316, "eval_runtime": 580.12, "eval_samples_per_second": 4.975, "eval_steps_per_second": 0.157, "learning_rate": 1.0000000000000002e-06, "step": 22659 }, { "epoch": 84.0, "eval_accuracy": 0.3115038115038115, "eval_f1_macro": 0.7250649377579587, "eval_f1_micro": 0.8234267187629895, "eval_loss": 0.12041348963975906, "eval_roc_auc": 0.8827056007271329, "eval_runtime": 581.6672, "eval_samples_per_second": 4.962, "eval_steps_per_second": 0.156, "learning_rate": 1.0000000000000002e-06, "step": 22932 }, { "epoch": 84.24908424908425, "grad_norm": 0.3593423068523407, "learning_rate": 1.0000000000000002e-06, "loss": 0.1169, "step": 23000 }, { "epoch": 85.0, "eval_accuracy": 0.31323631323631324, "eval_f1_macro": 0.7213085414821642, "eval_f1_micro": 0.8229879338226147, "eval_loss": 0.12035409361124039, "eval_roc_auc": 0.8831987441502598, "eval_runtime": 579.5593, "eval_samples_per_second": 4.98, "eval_steps_per_second": 0.157, "learning_rate": 1.0000000000000002e-06, "step": 23205 }, { "epoch": 86.0, "eval_accuracy": 0.3076923076923077, "eval_f1_macro": 0.7218120076279698, "eval_f1_micro": 0.8196243388446962, "eval_loss": 0.12250283360481262, "eval_roc_auc": 0.8800111790264615, "eval_runtime": 583.2647, "eval_samples_per_second": 4.948, "eval_steps_per_second": 0.156, "learning_rate": 1.0000000000000002e-06, "step": 23478 }, { "epoch": 86.08058608058609, "grad_norm": 0.3458651602268219, "learning_rate": 1.0000000000000002e-06, "loss": 0.1157, "step": 23500 }, { "epoch": 87.0, "eval_accuracy": 0.3090783090783091, "eval_f1_macro": 0.7151954083158903, "eval_f1_micro": 0.8203968852047224, "eval_loss": 0.12075748294591904, "eval_roc_auc": 0.8788858049167326, "eval_runtime": 580.273, "eval_samples_per_second": 4.974, "eval_steps_per_second": 0.157, "learning_rate": 1.0000000000000002e-06, "step": 23751 }, { "epoch": 87.91208791208791, "grad_norm": 0.2947898507118225, "learning_rate": 1.0000000000000002e-06, "loss": 0.1156, "step": 24000 }, { "epoch": 88.0, "eval_accuracy": 0.30838530838530837, "eval_f1_macro": 0.7168335672232342, "eval_f1_micro": 0.8215440749647566, "eval_loss": 0.12086642533540726, "eval_roc_auc": 0.8824075738373759, "eval_runtime": 581.7056, "eval_samples_per_second": 4.961, "eval_steps_per_second": 0.156, "learning_rate": 1.0000000000000002e-06, "step": 24024 }, { "epoch": 89.0, "eval_accuracy": 0.3163548163548164, "eval_f1_macro": 0.733984551040518, "eval_f1_micro": 0.8244650323850127, "eval_loss": 0.12105640023946762, "eval_roc_auc": 0.8874893628893881, "eval_runtime": 592.213, "eval_samples_per_second": 4.873, "eval_steps_per_second": 0.154, "learning_rate": 1.0000000000000002e-07, "step": 24297 }, { "epoch": 89.74358974358974, "grad_norm": 0.27441146969795227, "learning_rate": 1.0000000000000002e-07, "loss": 0.1157, "step": 24500 }, { "epoch": 90.0, "eval_accuracy": 0.31185031185031187, "eval_f1_macro": 0.7245620055819162, "eval_f1_micro": 0.8232248520710059, "eval_loss": 0.12090421468019485, "eval_roc_auc": 0.8860665789228, "eval_runtime": 602.5705, "eval_samples_per_second": 4.789, "eval_steps_per_second": 0.151, "learning_rate": 1.0000000000000002e-07, "step": 24570 }, { "epoch": 91.0, "eval_accuracy": 0.3115038115038115, "eval_f1_macro": 0.7163143946337084, "eval_f1_micro": 0.8200938495056143, "eval_loss": 0.12043782323598862, "eval_roc_auc": 0.8784951828509925, "eval_runtime": 602.4626, "eval_samples_per_second": 4.79, "eval_steps_per_second": 0.151, "learning_rate": 1.0000000000000002e-07, "step": 24843 }, { "epoch": 91.0, "learning_rate": 1.0000000000000002e-07, "step": 24843, "total_flos": 1.1760993126572918e+21, "train_loss": 0.13338198287712655, "train_runtime": 215448.2605, "train_samples_per_second": 6.068, "train_steps_per_second": 0.19 } ], "logging_steps": 500, "max_steps": 40950, "num_input_tokens_seen": 0, "num_train_epochs": 150, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 10, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.1760993126572918e+21, "train_batch_size": 32, "trial_name": null, "trial_params": null }