{ "best_metric": 1.0647395849227905, "best_model_checkpoint": "autotrain-qs45q-p9syi/checkpoint-453", "epoch": 3.0, "eval_steps": 500, "global_step": 453, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.013245033112582781, "grad_norm": 3.4723896980285645, "learning_rate": 1.3157894736842106e-06, "loss": 3.0146, "step": 2 }, { "epoch": 0.026490066225165563, "grad_norm": 3.5154001712799072, "learning_rate": 2.631578947368421e-06, "loss": 3.0404, "step": 4 }, { "epoch": 0.039735099337748346, "grad_norm": 3.4036054611206055, "learning_rate": 3.9473684210526315e-06, "loss": 3.0648, "step": 6 }, { "epoch": 0.052980132450331126, "grad_norm": 3.3698582649230957, "learning_rate": 5.263157894736842e-06, "loss": 3.0555, "step": 8 }, { "epoch": 0.06622516556291391, "grad_norm": 3.074321985244751, "learning_rate": 6.578947368421053e-06, "loss": 3.0161, "step": 10 }, { "epoch": 0.07947019867549669, "grad_norm": 3.69840407371521, "learning_rate": 7.894736842105263e-06, "loss": 3.0226, "step": 12 }, { "epoch": 0.09271523178807947, "grad_norm": 3.0229387283325195, "learning_rate": 9.210526315789474e-06, "loss": 3.0117, "step": 14 }, { "epoch": 0.10596026490066225, "grad_norm": 3.633251190185547, "learning_rate": 1.0526315789473684e-05, "loss": 2.9922, "step": 16 }, { "epoch": 0.11920529801324503, "grad_norm": 3.404634952545166, "learning_rate": 1.1842105263157895e-05, "loss": 2.9323, "step": 18 }, { "epoch": 0.13245033112582782, "grad_norm": 4.926875114440918, "learning_rate": 1.3157894736842106e-05, "loss": 2.9597, "step": 20 }, { "epoch": 0.1456953642384106, "grad_norm": 3.341071367263794, "learning_rate": 1.4473684210526317e-05, "loss": 2.927, "step": 22 }, { "epoch": 0.15894039735099338, "grad_norm": 3.596402645111084, "learning_rate": 1.5789473684210526e-05, "loss": 2.8622, "step": 24 }, { "epoch": 0.17218543046357615, "grad_norm": 3.554511547088623, "learning_rate": 1.7105263157894737e-05, "loss": 2.8552, "step": 26 }, { "epoch": 0.18543046357615894, "grad_norm": 3.871518135070801, "learning_rate": 1.8421052631578947e-05, "loss": 2.8545, "step": 28 }, { "epoch": 0.1986754966887417, "grad_norm": 4.008641719818115, "learning_rate": 1.9736842105263158e-05, "loss": 2.8275, "step": 30 }, { "epoch": 0.2119205298013245, "grad_norm": 4.181613445281982, "learning_rate": 2.105263157894737e-05, "loss": 2.7814, "step": 32 }, { "epoch": 0.2251655629139073, "grad_norm": 4.396944999694824, "learning_rate": 2.236842105263158e-05, "loss": 2.6354, "step": 34 }, { "epoch": 0.23841059602649006, "grad_norm": 4.029453754425049, "learning_rate": 2.368421052631579e-05, "loss": 2.5842, "step": 36 }, { "epoch": 0.25165562913907286, "grad_norm": 3.9177920818328857, "learning_rate": 2.5e-05, "loss": 2.6199, "step": 38 }, { "epoch": 0.26490066225165565, "grad_norm": 5.803523063659668, "learning_rate": 2.6315789473684212e-05, "loss": 2.5609, "step": 40 }, { "epoch": 0.2781456953642384, "grad_norm": 6.542681694030762, "learning_rate": 2.7631578947368426e-05, "loss": 2.2103, "step": 42 }, { "epoch": 0.2913907284768212, "grad_norm": 4.970069408416748, "learning_rate": 2.8947368421052634e-05, "loss": 2.5223, "step": 44 }, { "epoch": 0.304635761589404, "grad_norm": 7.797888278961182, "learning_rate": 3.0263157894736844e-05, "loss": 2.2236, "step": 46 }, { "epoch": 0.31788079470198677, "grad_norm": 5.971277713775635, "learning_rate": 3.157894736842105e-05, "loss": 2.6381, "step": 48 }, { "epoch": 0.33112582781456956, "grad_norm": 6.939202785491943, "learning_rate": 3.289473684210527e-05, "loss": 2.3483, "step": 50 }, { "epoch": 0.3443708609271523, "grad_norm": 5.877003192901611, "learning_rate": 3.421052631578947e-05, "loss": 1.9744, "step": 52 }, { "epoch": 0.3576158940397351, "grad_norm": 5.517603397369385, "learning_rate": 3.5526315789473684e-05, "loss": 1.9791, "step": 54 }, { "epoch": 0.3708609271523179, "grad_norm": 7.6599440574646, "learning_rate": 3.6842105263157895e-05, "loss": 2.0475, "step": 56 }, { "epoch": 0.3841059602649007, "grad_norm": 6.77654504776001, "learning_rate": 3.815789473684211e-05, "loss": 1.923, "step": 58 }, { "epoch": 0.3973509933774834, "grad_norm": 6.017172336578369, "learning_rate": 3.9473684210526316e-05, "loss": 1.9688, "step": 60 }, { "epoch": 0.4105960264900662, "grad_norm": 5.819124221801758, "learning_rate": 4.078947368421053e-05, "loss": 2.2777, "step": 62 }, { "epoch": 0.423841059602649, "grad_norm": 5.240574359893799, "learning_rate": 4.210526315789474e-05, "loss": 1.7219, "step": 64 }, { "epoch": 0.4370860927152318, "grad_norm": 7.376441478729248, "learning_rate": 4.342105263157895e-05, "loss": 1.5784, "step": 66 }, { "epoch": 0.4503311258278146, "grad_norm": 5.029400825500488, "learning_rate": 4.473684210526316e-05, "loss": 1.6895, "step": 68 }, { "epoch": 0.46357615894039733, "grad_norm": 10.123241424560547, "learning_rate": 4.605263157894737e-05, "loss": 1.6955, "step": 70 }, { "epoch": 0.4768211920529801, "grad_norm": 6.67560338973999, "learning_rate": 4.736842105263158e-05, "loss": 1.9591, "step": 72 }, { "epoch": 0.4900662251655629, "grad_norm": 8.467432975769043, "learning_rate": 4.868421052631579e-05, "loss": 2.0633, "step": 74 }, { "epoch": 0.5033112582781457, "grad_norm": 5.2431535720825195, "learning_rate": 5e-05, "loss": 1.3436, "step": 76 }, { "epoch": 0.5165562913907285, "grad_norm": 7.770199298858643, "learning_rate": 4.985272459499264e-05, "loss": 1.9379, "step": 78 }, { "epoch": 0.5298013245033113, "grad_norm": 5.734556198120117, "learning_rate": 4.9705449189985276e-05, "loss": 1.7279, "step": 80 }, { "epoch": 0.543046357615894, "grad_norm": 6.759193420410156, "learning_rate": 4.955817378497791e-05, "loss": 1.4606, "step": 82 }, { "epoch": 0.5562913907284768, "grad_norm": 10.67810344696045, "learning_rate": 4.941089837997055e-05, "loss": 1.3069, "step": 84 }, { "epoch": 0.5695364238410596, "grad_norm": 7.0206098556518555, "learning_rate": 4.926362297496319e-05, "loss": 1.8698, "step": 86 }, { "epoch": 0.5827814569536424, "grad_norm": 4.365483283996582, "learning_rate": 4.911634756995582e-05, "loss": 1.5244, "step": 88 }, { "epoch": 0.5960264900662252, "grad_norm": 8.567715644836426, "learning_rate": 4.8969072164948454e-05, "loss": 1.7503, "step": 90 }, { "epoch": 0.609271523178808, "grad_norm": 7.213935852050781, "learning_rate": 4.882179675994109e-05, "loss": 1.6514, "step": 92 }, { "epoch": 0.6225165562913907, "grad_norm": 5.607529163360596, "learning_rate": 4.867452135493373e-05, "loss": 1.0184, "step": 94 }, { "epoch": 0.6357615894039735, "grad_norm": 9.907747268676758, "learning_rate": 4.8527245949926364e-05, "loss": 1.7225, "step": 96 }, { "epoch": 0.6490066225165563, "grad_norm": 13.226700782775879, "learning_rate": 4.8379970544919e-05, "loss": 1.9432, "step": 98 }, { "epoch": 0.6622516556291391, "grad_norm": 11.118176460266113, "learning_rate": 4.823269513991164e-05, "loss": 1.416, "step": 100 }, { "epoch": 0.6754966887417219, "grad_norm": 10.29113483428955, "learning_rate": 4.8085419734904275e-05, "loss": 1.6586, "step": 102 }, { "epoch": 0.6887417218543046, "grad_norm": 9.081689834594727, "learning_rate": 4.793814432989691e-05, "loss": 1.305, "step": 104 }, { "epoch": 0.7019867549668874, "grad_norm": 6.368233680725098, "learning_rate": 4.779086892488955e-05, "loss": 1.1645, "step": 106 }, { "epoch": 0.7152317880794702, "grad_norm": 12.125802040100098, "learning_rate": 4.764359351988218e-05, "loss": 0.8825, "step": 108 }, { "epoch": 0.7284768211920529, "grad_norm": 14.471185684204102, "learning_rate": 4.7496318114874815e-05, "loss": 1.3133, "step": 110 }, { "epoch": 0.7417218543046358, "grad_norm": 10.047285079956055, "learning_rate": 4.734904270986745e-05, "loss": 1.0271, "step": 112 }, { "epoch": 0.7549668874172185, "grad_norm": 10.376700401306152, "learning_rate": 4.720176730486009e-05, "loss": 1.5487, "step": 114 }, { "epoch": 0.7682119205298014, "grad_norm": 9.097859382629395, "learning_rate": 4.7054491899852726e-05, "loss": 1.7211, "step": 116 }, { "epoch": 0.7814569536423841, "grad_norm": 11.168022155761719, "learning_rate": 4.690721649484536e-05, "loss": 1.3699, "step": 118 }, { "epoch": 0.7947019867549668, "grad_norm": 7.571075439453125, "learning_rate": 4.6759941089838e-05, "loss": 0.7628, "step": 120 }, { "epoch": 0.8079470198675497, "grad_norm": 7.332944393157959, "learning_rate": 4.661266568483064e-05, "loss": 1.2271, "step": 122 }, { "epoch": 0.8211920529801324, "grad_norm": 9.472811698913574, "learning_rate": 4.6465390279823274e-05, "loss": 0.9225, "step": 124 }, { "epoch": 0.8344370860927153, "grad_norm": 7.298526287078857, "learning_rate": 4.631811487481591e-05, "loss": 1.4189, "step": 126 }, { "epoch": 0.847682119205298, "grad_norm": 6.718891620635986, "learning_rate": 4.617083946980855e-05, "loss": 0.9269, "step": 128 }, { "epoch": 0.8609271523178808, "grad_norm": 5.722424507141113, "learning_rate": 4.602356406480118e-05, "loss": 1.5321, "step": 130 }, { "epoch": 0.8741721854304636, "grad_norm": 8.258366584777832, "learning_rate": 4.5876288659793814e-05, "loss": 0.9162, "step": 132 }, { "epoch": 0.8874172185430463, "grad_norm": 6.919400691986084, "learning_rate": 4.572901325478645e-05, "loss": 1.0685, "step": 134 }, { "epoch": 0.9006622516556292, "grad_norm": 6.770501613616943, "learning_rate": 4.558173784977909e-05, "loss": 0.7407, "step": 136 }, { "epoch": 0.9139072847682119, "grad_norm": 6.387173652648926, "learning_rate": 4.5434462444771725e-05, "loss": 0.6707, "step": 138 }, { "epoch": 0.9271523178807947, "grad_norm": 11.18732738494873, "learning_rate": 4.528718703976436e-05, "loss": 1.3069, "step": 140 }, { "epoch": 0.9403973509933775, "grad_norm": 4.17434549331665, "learning_rate": 4.5139911634757e-05, "loss": 0.7223, "step": 142 }, { "epoch": 0.9536423841059603, "grad_norm": 7.86276388168335, "learning_rate": 4.4992636229749635e-05, "loss": 0.7461, "step": 144 }, { "epoch": 0.9668874172185431, "grad_norm": 11.436422348022461, "learning_rate": 4.484536082474227e-05, "loss": 1.4973, "step": 146 }, { "epoch": 0.9801324503311258, "grad_norm": 11.492631912231445, "learning_rate": 4.469808541973491e-05, "loss": 1.1266, "step": 148 }, { "epoch": 0.9933774834437086, "grad_norm": 6.025942802429199, "learning_rate": 4.4550810014727546e-05, "loss": 0.5809, "step": 150 }, { "epoch": 1.0, "eval_accuracy": 0.62, "eval_f1_macro": 0.2182729551150604, "eval_f1_micro": 0.62, "eval_f1_weighted": 0.5708288904078378, "eval_loss": 1.3821334838867188, "eval_precision_macro": 0.20881118881118882, "eval_precision_micro": 0.62, "eval_precision_weighted": 0.5551748251748252, "eval_recall_macro": 0.25717532467532467, "eval_recall_micro": 0.62, "eval_recall_weighted": 0.62, "eval_runtime": 0.1587, "eval_samples_per_second": 630.071, "eval_steps_per_second": 44.105, "step": 151 }, { "epoch": 1.0066225165562914, "grad_norm": 6.717867374420166, "learning_rate": 4.447717231222386e-05, "loss": 0.8661, "step": 152 }, { "epoch": 1.0198675496688743, "grad_norm": 5.969238758087158, "learning_rate": 4.4329896907216494e-05, "loss": 0.6295, "step": 154 }, { "epoch": 1.033112582781457, "grad_norm": 5.343296527862549, "learning_rate": 4.418262150220913e-05, "loss": 1.1331, "step": 156 }, { "epoch": 1.0463576158940397, "grad_norm": 5.666139602661133, "learning_rate": 4.403534609720177e-05, "loss": 0.8978, "step": 158 }, { "epoch": 1.0596026490066226, "grad_norm": 5.301098823547363, "learning_rate": 4.3888070692194405e-05, "loss": 0.6172, "step": 160 }, { "epoch": 1.0728476821192052, "grad_norm": 6.197515964508057, "learning_rate": 4.374079528718704e-05, "loss": 0.6356, "step": 162 }, { "epoch": 1.086092715231788, "grad_norm": 9.58243179321289, "learning_rate": 4.359351988217968e-05, "loss": 1.5982, "step": 164 }, { "epoch": 1.099337748344371, "grad_norm": 5.4112629890441895, "learning_rate": 4.3446244477172316e-05, "loss": 0.5373, "step": 166 }, { "epoch": 1.1125827814569536, "grad_norm": 6.890096664428711, "learning_rate": 4.329896907216495e-05, "loss": 1.594, "step": 168 }, { "epoch": 1.1258278145695364, "grad_norm": 13.663280487060547, "learning_rate": 4.315169366715759e-05, "loss": 0.6526, "step": 170 }, { "epoch": 1.1390728476821192, "grad_norm": 5.038116455078125, "learning_rate": 4.3004418262150226e-05, "loss": 0.8239, "step": 172 }, { "epoch": 1.152317880794702, "grad_norm": 10.813724517822266, "learning_rate": 4.2857142857142856e-05, "loss": 0.9156, "step": 174 }, { "epoch": 1.1655629139072847, "grad_norm": 3.2282865047454834, "learning_rate": 4.270986745213549e-05, "loss": 0.6795, "step": 176 }, { "epoch": 1.1788079470198676, "grad_norm": 5.9910197257995605, "learning_rate": 4.256259204712813e-05, "loss": 1.0571, "step": 178 }, { "epoch": 1.1920529801324504, "grad_norm": 4.989276885986328, "learning_rate": 4.241531664212077e-05, "loss": 1.1591, "step": 180 }, { "epoch": 1.205298013245033, "grad_norm": 12.654097557067871, "learning_rate": 4.2268041237113404e-05, "loss": 1.3218, "step": 182 }, { "epoch": 1.218543046357616, "grad_norm": 4.979647636413574, "learning_rate": 4.212076583210604e-05, "loss": 0.6925, "step": 184 }, { "epoch": 1.2317880794701987, "grad_norm": 9.59062385559082, "learning_rate": 4.197349042709868e-05, "loss": 1.2088, "step": 186 }, { "epoch": 1.2450331125827814, "grad_norm": 8.358089447021484, "learning_rate": 4.1826215022091314e-05, "loss": 0.7255, "step": 188 }, { "epoch": 1.2582781456953642, "grad_norm": 1.6286242008209229, "learning_rate": 4.167893961708395e-05, "loss": 0.6235, "step": 190 }, { "epoch": 1.271523178807947, "grad_norm": 7.9347662925720215, "learning_rate": 4.153166421207659e-05, "loss": 0.964, "step": 192 }, { "epoch": 1.2847682119205297, "grad_norm": 6.71475887298584, "learning_rate": 4.138438880706922e-05, "loss": 0.9162, "step": 194 }, { "epoch": 1.2980132450331126, "grad_norm": 2.8124191761016846, "learning_rate": 4.1237113402061855e-05, "loss": 0.3924, "step": 196 }, { "epoch": 1.3112582781456954, "grad_norm": 8.39048957824707, "learning_rate": 4.108983799705449e-05, "loss": 0.68, "step": 198 }, { "epoch": 1.3245033112582782, "grad_norm": 10.033766746520996, "learning_rate": 4.094256259204713e-05, "loss": 0.6962, "step": 200 }, { "epoch": 1.3377483443708609, "grad_norm": 6.6539106369018555, "learning_rate": 4.0795287187039766e-05, "loss": 1.0892, "step": 202 }, { "epoch": 1.3509933774834437, "grad_norm": 5.977195739746094, "learning_rate": 4.06480117820324e-05, "loss": 0.5886, "step": 204 }, { "epoch": 1.3642384105960264, "grad_norm": 5.548197269439697, "learning_rate": 4.050073637702504e-05, "loss": 0.9609, "step": 206 }, { "epoch": 1.3774834437086092, "grad_norm": 6.958375453948975, "learning_rate": 4.0353460972017676e-05, "loss": 1.0345, "step": 208 }, { "epoch": 1.390728476821192, "grad_norm": 14.204687118530273, "learning_rate": 4.020618556701031e-05, "loss": 0.7327, "step": 210 }, { "epoch": 1.403973509933775, "grad_norm": 8.209890365600586, "learning_rate": 4.005891016200295e-05, "loss": 0.5259, "step": 212 }, { "epoch": 1.4172185430463577, "grad_norm": 6.424736976623535, "learning_rate": 3.991163475699559e-05, "loss": 0.9341, "step": 214 }, { "epoch": 1.4304635761589404, "grad_norm": 7.625380039215088, "learning_rate": 3.976435935198822e-05, "loss": 1.1303, "step": 216 }, { "epoch": 1.4437086092715232, "grad_norm": 7.861330509185791, "learning_rate": 3.9617083946980854e-05, "loss": 0.5599, "step": 218 }, { "epoch": 1.4569536423841059, "grad_norm": 11.318681716918945, "learning_rate": 3.946980854197349e-05, "loss": 0.665, "step": 220 }, { "epoch": 1.4701986754966887, "grad_norm": 8.797566413879395, "learning_rate": 3.932253313696613e-05, "loss": 0.9297, "step": 222 }, { "epoch": 1.4834437086092715, "grad_norm": 9.536187171936035, "learning_rate": 3.9175257731958764e-05, "loss": 0.5168, "step": 224 }, { "epoch": 1.4966887417218544, "grad_norm": 5.4594807624816895, "learning_rate": 3.90279823269514e-05, "loss": 0.6562, "step": 226 }, { "epoch": 1.5099337748344372, "grad_norm": 6.416939735412598, "learning_rate": 3.888070692194404e-05, "loss": 0.4248, "step": 228 }, { "epoch": 1.5231788079470199, "grad_norm": 8.049701690673828, "learning_rate": 3.8733431516936675e-05, "loss": 0.6311, "step": 230 }, { "epoch": 1.5364238410596025, "grad_norm": 6.978274345397949, "learning_rate": 3.858615611192931e-05, "loss": 0.5238, "step": 232 }, { "epoch": 1.5496688741721854, "grad_norm": 4.730326175689697, "learning_rate": 3.843888070692195e-05, "loss": 0.9127, "step": 234 }, { "epoch": 1.5629139072847682, "grad_norm": 6.017629623413086, "learning_rate": 3.836524300441827e-05, "loss": 0.6197, "step": 236 }, { "epoch": 1.576158940397351, "grad_norm": 1.4615082740783691, "learning_rate": 3.82179675994109e-05, "loss": 0.1855, "step": 238 }, { "epoch": 1.589403973509934, "grad_norm": 17.785310745239258, "learning_rate": 3.8070692194403534e-05, "loss": 1.4609, "step": 240 }, { "epoch": 1.6026490066225165, "grad_norm": 11.241979598999023, "learning_rate": 3.792341678939617e-05, "loss": 0.8828, "step": 242 }, { "epoch": 1.6158940397350994, "grad_norm": 13.630949974060059, "learning_rate": 3.777614138438881e-05, "loss": 1.374, "step": 244 }, { "epoch": 1.629139072847682, "grad_norm": 13.52291488647461, "learning_rate": 3.7628865979381445e-05, "loss": 0.7898, "step": 246 }, { "epoch": 1.6423841059602649, "grad_norm": 6.53799295425415, "learning_rate": 3.748159057437408e-05, "loss": 0.7232, "step": 248 }, { "epoch": 1.6556291390728477, "grad_norm": 7.03351354598999, "learning_rate": 3.733431516936672e-05, "loss": 1.5363, "step": 250 }, { "epoch": 1.6688741721854305, "grad_norm": 10.971170425415039, "learning_rate": 3.7187039764359355e-05, "loss": 0.3952, "step": 252 }, { "epoch": 1.6821192052980134, "grad_norm": 10.918181419372559, "learning_rate": 3.703976435935199e-05, "loss": 1.0573, "step": 254 }, { "epoch": 1.695364238410596, "grad_norm": 5.845329284667969, "learning_rate": 3.689248895434463e-05, "loss": 0.9152, "step": 256 }, { "epoch": 1.7086092715231787, "grad_norm": 10.043814659118652, "learning_rate": 3.6745213549337266e-05, "loss": 0.7483, "step": 258 }, { "epoch": 1.7218543046357615, "grad_norm": 9.504009246826172, "learning_rate": 3.6597938144329896e-05, "loss": 1.3825, "step": 260 }, { "epoch": 1.7350993377483444, "grad_norm": 7.6096415519714355, "learning_rate": 3.645066273932253e-05, "loss": 0.3722, "step": 262 }, { "epoch": 1.7483443708609272, "grad_norm": 15.977928161621094, "learning_rate": 3.630338733431517e-05, "loss": 0.7817, "step": 264 }, { "epoch": 1.76158940397351, "grad_norm": 12.720684051513672, "learning_rate": 3.6156111929307806e-05, "loss": 0.5112, "step": 266 }, { "epoch": 1.7748344370860927, "grad_norm": 8.58932113647461, "learning_rate": 3.600883652430044e-05, "loss": 0.7, "step": 268 }, { "epoch": 1.7880794701986755, "grad_norm": 13.423791885375977, "learning_rate": 3.586156111929308e-05, "loss": 1.6471, "step": 270 }, { "epoch": 1.8013245033112582, "grad_norm": 6.468234062194824, "learning_rate": 3.571428571428572e-05, "loss": 0.66, "step": 272 }, { "epoch": 1.814569536423841, "grad_norm": 9.493569374084473, "learning_rate": 3.5567010309278354e-05, "loss": 1.1906, "step": 274 }, { "epoch": 1.8278145695364238, "grad_norm": 4.703698635101318, "learning_rate": 3.541973490427099e-05, "loss": 0.4843, "step": 276 }, { "epoch": 1.8410596026490067, "grad_norm": 7.946201324462891, "learning_rate": 3.527245949926363e-05, "loss": 0.5529, "step": 278 }, { "epoch": 1.8543046357615895, "grad_norm": 6.270992279052734, "learning_rate": 3.512518409425626e-05, "loss": 0.9139, "step": 280 }, { "epoch": 1.8675496688741722, "grad_norm": 12.685065269470215, "learning_rate": 3.4977908689248894e-05, "loss": 0.4489, "step": 282 }, { "epoch": 1.8807947019867548, "grad_norm": 9.88779354095459, "learning_rate": 3.483063328424153e-05, "loss": 1.0594, "step": 284 }, { "epoch": 1.8940397350993377, "grad_norm": 15.420830726623535, "learning_rate": 3.468335787923417e-05, "loss": 1.5748, "step": 286 }, { "epoch": 1.9072847682119205, "grad_norm": 12.748838424682617, "learning_rate": 3.4536082474226805e-05, "loss": 0.3906, "step": 288 }, { "epoch": 1.9205298013245033, "grad_norm": 8.6655912399292, "learning_rate": 3.438880706921944e-05, "loss": 0.7372, "step": 290 }, { "epoch": 1.9337748344370862, "grad_norm": 2.1088364124298096, "learning_rate": 3.424153166421208e-05, "loss": 0.6368, "step": 292 }, { "epoch": 1.9470198675496688, "grad_norm": 5.693451404571533, "learning_rate": 3.4094256259204716e-05, "loss": 1.1065, "step": 294 }, { "epoch": 1.9602649006622517, "grad_norm": 5.4681315422058105, "learning_rate": 3.394698085419735e-05, "loss": 0.2881, "step": 296 }, { "epoch": 1.9735099337748343, "grad_norm": 3.1230578422546387, "learning_rate": 3.379970544918999e-05, "loss": 0.7056, "step": 298 }, { "epoch": 1.9867549668874172, "grad_norm": 8.198813438415527, "learning_rate": 3.3652430044182626e-05, "loss": 0.3768, "step": 300 }, { "epoch": 2.0, "grad_norm": 5.530179023742676, "learning_rate": 3.3505154639175256e-05, "loss": 0.3156, "step": 302 }, { "epoch": 2.0, "eval_accuracy": 0.74, "eval_f1_macro": 0.45774191750278714, "eval_f1_micro": 0.74, "eval_f1_weighted": 0.7020952062430323, "eval_loss": 1.118362307548523, "eval_precision_macro": 0.5131296992481202, "eval_precision_micro": 0.74, "eval_precision_weighted": 0.7261616541353384, "eval_recall_macro": 0.48530303030303035, "eval_recall_micro": 0.74, "eval_recall_weighted": 0.74, "eval_runtime": 0.1632, "eval_samples_per_second": 612.634, "eval_steps_per_second": 42.884, "step": 302 }, { "epoch": 2.013245033112583, "grad_norm": 1.5750505924224854, "learning_rate": 3.335787923416789e-05, "loss": 0.1854, "step": 304 }, { "epoch": 2.0264900662251657, "grad_norm": 3.4210903644561768, "learning_rate": 3.321060382916053e-05, "loss": 0.3148, "step": 306 }, { "epoch": 2.0397350993377485, "grad_norm": 7.4305033683776855, "learning_rate": 3.306332842415317e-05, "loss": 0.7654, "step": 308 }, { "epoch": 2.052980132450331, "grad_norm": 2.1427576541900635, "learning_rate": 3.2916053019145804e-05, "loss": 0.2679, "step": 310 }, { "epoch": 2.066225165562914, "grad_norm": 4.614518165588379, "learning_rate": 3.276877761413844e-05, "loss": 0.722, "step": 312 }, { "epoch": 2.0794701986754967, "grad_norm": 5.343958377838135, "learning_rate": 3.262150220913108e-05, "loss": 0.658, "step": 314 }, { "epoch": 2.0927152317880795, "grad_norm": 7.354612827301025, "learning_rate": 3.2474226804123714e-05, "loss": 0.5283, "step": 316 }, { "epoch": 2.1059602649006623, "grad_norm": 3.1576592922210693, "learning_rate": 3.232695139911635e-05, "loss": 0.096, "step": 318 }, { "epoch": 2.119205298013245, "grad_norm": 3.2874159812927246, "learning_rate": 3.217967599410899e-05, "loss": 0.2096, "step": 320 }, { "epoch": 2.1324503311258276, "grad_norm": 0.5562194585800171, "learning_rate": 3.2032400589101625e-05, "loss": 0.1196, "step": 322 }, { "epoch": 2.1456953642384105, "grad_norm": 13.274640083312988, "learning_rate": 3.1885125184094255e-05, "loss": 1.067, "step": 324 }, { "epoch": 2.1589403973509933, "grad_norm": 8.369612693786621, "learning_rate": 3.173784977908689e-05, "loss": 0.6852, "step": 326 }, { "epoch": 2.172185430463576, "grad_norm": 10.826526641845703, "learning_rate": 3.159057437407953e-05, "loss": 0.5073, "step": 328 }, { "epoch": 2.185430463576159, "grad_norm": 6.016572952270508, "learning_rate": 3.1443298969072166e-05, "loss": 0.5949, "step": 330 }, { "epoch": 2.198675496688742, "grad_norm": 10.392781257629395, "learning_rate": 3.12960235640648e-05, "loss": 0.7787, "step": 332 }, { "epoch": 2.2119205298013247, "grad_norm": 4.342617511749268, "learning_rate": 3.114874815905744e-05, "loss": 0.4602, "step": 334 }, { "epoch": 2.225165562913907, "grad_norm": 7.711668014526367, "learning_rate": 3.1001472754050076e-05, "loss": 0.3792, "step": 336 }, { "epoch": 2.23841059602649, "grad_norm": 5.162533283233643, "learning_rate": 3.085419734904271e-05, "loss": 0.2087, "step": 338 }, { "epoch": 2.251655629139073, "grad_norm": 7.244391441345215, "learning_rate": 3.070692194403535e-05, "loss": 0.298, "step": 340 }, { "epoch": 2.2649006622516556, "grad_norm": 10.348755836486816, "learning_rate": 3.055964653902799e-05, "loss": 0.3321, "step": 342 }, { "epoch": 2.2781456953642385, "grad_norm": 1.1933870315551758, "learning_rate": 3.0412371134020617e-05, "loss": 0.0756, "step": 344 }, { "epoch": 2.2913907284768213, "grad_norm": 9.06655216217041, "learning_rate": 3.0265095729013254e-05, "loss": 0.7712, "step": 346 }, { "epoch": 2.304635761589404, "grad_norm": 8.230104446411133, "learning_rate": 3.011782032400589e-05, "loss": 0.9383, "step": 348 }, { "epoch": 2.3178807947019866, "grad_norm": 10.278660774230957, "learning_rate": 2.9970544918998527e-05, "loss": 0.6566, "step": 350 }, { "epoch": 2.3311258278145695, "grad_norm": 1.700579047203064, "learning_rate": 2.9823269513991164e-05, "loss": 0.1095, "step": 352 }, { "epoch": 2.3443708609271523, "grad_norm": 4.130746841430664, "learning_rate": 2.96759941089838e-05, "loss": 0.7986, "step": 354 }, { "epoch": 2.357615894039735, "grad_norm": 9.603137016296387, "learning_rate": 2.9528718703976438e-05, "loss": 0.6449, "step": 356 }, { "epoch": 2.370860927152318, "grad_norm": 13.046420097351074, "learning_rate": 2.9381443298969075e-05, "loss": 0.7776, "step": 358 }, { "epoch": 2.384105960264901, "grad_norm": 4.095331192016602, "learning_rate": 2.9234167893961712e-05, "loss": 0.2639, "step": 360 }, { "epoch": 2.3973509933774833, "grad_norm": 0.3949756324291229, "learning_rate": 2.908689248895435e-05, "loss": 0.1134, "step": 362 }, { "epoch": 2.410596026490066, "grad_norm": 3.804518699645996, "learning_rate": 2.8939617083946985e-05, "loss": 0.3421, "step": 364 }, { "epoch": 2.423841059602649, "grad_norm": 12.5763521194458, "learning_rate": 2.8792341678939616e-05, "loss": 0.7242, "step": 366 }, { "epoch": 2.437086092715232, "grad_norm": 5.6927032470703125, "learning_rate": 2.8645066273932252e-05, "loss": 0.9591, "step": 368 }, { "epoch": 2.4503311258278146, "grad_norm": 13.04416275024414, "learning_rate": 2.849779086892489e-05, "loss": 0.4636, "step": 370 }, { "epoch": 2.4635761589403975, "grad_norm": 4.799520015716553, "learning_rate": 2.8350515463917526e-05, "loss": 0.4037, "step": 372 }, { "epoch": 2.47682119205298, "grad_norm": 8.108109474182129, "learning_rate": 2.8203240058910163e-05, "loss": 1.2403, "step": 374 }, { "epoch": 2.4900662251655628, "grad_norm": 3.977107286453247, "learning_rate": 2.80559646539028e-05, "loss": 0.1793, "step": 376 }, { "epoch": 2.5033112582781456, "grad_norm": 8.900064468383789, "learning_rate": 2.7908689248895437e-05, "loss": 0.6712, "step": 378 }, { "epoch": 2.5165562913907285, "grad_norm": 5.152413368225098, "learning_rate": 2.7761413843888074e-05, "loss": 0.0786, "step": 380 }, { "epoch": 2.5298013245033113, "grad_norm": 1.7878741025924683, "learning_rate": 2.761413843888071e-05, "loss": 0.3663, "step": 382 }, { "epoch": 2.543046357615894, "grad_norm": 4.110722064971924, "learning_rate": 2.7466863033873347e-05, "loss": 0.3523, "step": 384 }, { "epoch": 2.556291390728477, "grad_norm": 15.2096586227417, "learning_rate": 2.7319587628865977e-05, "loss": 0.233, "step": 386 }, { "epoch": 2.5695364238410594, "grad_norm": 4.752151012420654, "learning_rate": 2.7172312223858614e-05, "loss": 0.5043, "step": 388 }, { "epoch": 2.5827814569536423, "grad_norm": 9.852655410766602, "learning_rate": 2.702503681885125e-05, "loss": 0.1858, "step": 390 }, { "epoch": 2.596026490066225, "grad_norm": 13.454380989074707, "learning_rate": 2.6877761413843888e-05, "loss": 0.597, "step": 392 }, { "epoch": 2.609271523178808, "grad_norm": 7.8776326179504395, "learning_rate": 2.6730486008836525e-05, "loss": 0.7841, "step": 394 }, { "epoch": 2.622516556291391, "grad_norm": 9.048988342285156, "learning_rate": 2.6583210603829162e-05, "loss": 0.8305, "step": 396 }, { "epoch": 2.6357615894039736, "grad_norm": 13.187280654907227, "learning_rate": 2.64359351988218e-05, "loss": 1.0179, "step": 398 }, { "epoch": 2.6490066225165565, "grad_norm": 13.392657279968262, "learning_rate": 2.6288659793814435e-05, "loss": 0.1317, "step": 400 }, { "epoch": 2.662251655629139, "grad_norm": 11.062936782836914, "learning_rate": 2.6141384388807072e-05, "loss": 0.6804, "step": 402 }, { "epoch": 2.6754966887417218, "grad_norm": 11.463652610778809, "learning_rate": 2.599410898379971e-05, "loss": 0.5558, "step": 404 }, { "epoch": 2.6887417218543046, "grad_norm": 5.696853160858154, "learning_rate": 2.5846833578792346e-05, "loss": 0.7812, "step": 406 }, { "epoch": 2.7019867549668874, "grad_norm": 15.077645301818848, "learning_rate": 2.5699558173784976e-05, "loss": 0.6744, "step": 408 }, { "epoch": 2.7152317880794703, "grad_norm": 4.043674468994141, "learning_rate": 2.5552282768777613e-05, "loss": 0.3354, "step": 410 }, { "epoch": 2.7284768211920527, "grad_norm": 1.150486946105957, "learning_rate": 2.540500736377025e-05, "loss": 0.3314, "step": 412 }, { "epoch": 2.741721854304636, "grad_norm": 12.133696556091309, "learning_rate": 2.5257731958762887e-05, "loss": 0.526, "step": 414 }, { "epoch": 2.7549668874172184, "grad_norm": 11.533574104309082, "learning_rate": 2.5110456553755524e-05, "loss": 0.8745, "step": 416 }, { "epoch": 2.7682119205298013, "grad_norm": 1.3846099376678467, "learning_rate": 2.496318114874816e-05, "loss": 0.204, "step": 418 }, { "epoch": 2.781456953642384, "grad_norm": 4.653772830963135, "learning_rate": 2.4815905743740797e-05, "loss": 0.3621, "step": 420 }, { "epoch": 2.794701986754967, "grad_norm": 8.693764686584473, "learning_rate": 2.4668630338733434e-05, "loss": 0.4153, "step": 422 }, { "epoch": 2.80794701986755, "grad_norm": 11.415797233581543, "learning_rate": 2.4521354933726068e-05, "loss": 0.6997, "step": 424 }, { "epoch": 2.821192052980132, "grad_norm": 3.682908773422241, "learning_rate": 2.4374079528718704e-05, "loss": 0.3039, "step": 426 }, { "epoch": 2.8344370860927155, "grad_norm": 9.39224624633789, "learning_rate": 2.422680412371134e-05, "loss": 0.6302, "step": 428 }, { "epoch": 2.847682119205298, "grad_norm": 3.271193742752075, "learning_rate": 2.4079528718703978e-05, "loss": 0.2706, "step": 430 }, { "epoch": 2.8609271523178808, "grad_norm": 7.453132629394531, "learning_rate": 2.3932253313696615e-05, "loss": 0.4594, "step": 432 }, { "epoch": 2.8741721854304636, "grad_norm": 16.519481658935547, "learning_rate": 2.378497790868925e-05, "loss": 0.2437, "step": 434 }, { "epoch": 2.8874172185430464, "grad_norm": 17.087709426879883, "learning_rate": 2.3637702503681885e-05, "loss": 0.9733, "step": 436 }, { "epoch": 2.9006622516556293, "grad_norm": 1.0074738264083862, "learning_rate": 2.3490427098674522e-05, "loss": 0.649, "step": 438 }, { "epoch": 2.9139072847682117, "grad_norm": 4.366696834564209, "learning_rate": 2.334315169366716e-05, "loss": 0.5205, "step": 440 }, { "epoch": 2.9271523178807946, "grad_norm": 20.609182357788086, "learning_rate": 2.3195876288659796e-05, "loss": 0.2493, "step": 442 }, { "epoch": 2.9403973509933774, "grad_norm": 9.770134925842285, "learning_rate": 2.3048600883652433e-05, "loss": 0.1564, "step": 444 }, { "epoch": 2.9536423841059603, "grad_norm": 8.714824676513672, "learning_rate": 2.2901325478645066e-05, "loss": 0.6449, "step": 446 }, { "epoch": 2.966887417218543, "grad_norm": 7.8644514083862305, "learning_rate": 2.2754050073637703e-05, "loss": 1.3438, "step": 448 }, { "epoch": 2.980132450331126, "grad_norm": 5.881997108459473, "learning_rate": 2.260677466863034e-05, "loss": 0.3383, "step": 450 }, { "epoch": 2.993377483443709, "grad_norm": 12.096423149108887, "learning_rate": 2.2459499263622977e-05, "loss": 0.7644, "step": 452 }, { "epoch": 3.0, "eval_accuracy": 0.74, "eval_f1_macro": 0.4558641367469575, "eval_f1_micro": 0.74, "eval_f1_weighted": 0.7165269403625714, "eval_loss": 1.0647395849227905, "eval_precision_macro": 0.47941017316017315, "eval_precision_micro": 0.74, "eval_precision_weighted": 0.7261709956709957, "eval_recall_macro": 0.4931601731601732, "eval_recall_micro": 0.74, "eval_recall_weighted": 0.74, "eval_runtime": 0.1879, "eval_samples_per_second": 532.193, "eval_steps_per_second": 37.254, "step": 453 } ], "logging_steps": 2, "max_steps": 755, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "total_flos": 119358311592960.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }