dhananjay2912's picture
Upload folder using huggingface_hub
1bc010d verified
{
"best_metric": 1.0647395849227905,
"best_model_checkpoint": "autotrain-qs45q-p9syi/checkpoint-453",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 453,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.013245033112582781,
"grad_norm": 3.4723896980285645,
"learning_rate": 1.3157894736842106e-06,
"loss": 3.0146,
"step": 2
},
{
"epoch": 0.026490066225165563,
"grad_norm": 3.5154001712799072,
"learning_rate": 2.631578947368421e-06,
"loss": 3.0404,
"step": 4
},
{
"epoch": 0.039735099337748346,
"grad_norm": 3.4036054611206055,
"learning_rate": 3.9473684210526315e-06,
"loss": 3.0648,
"step": 6
},
{
"epoch": 0.052980132450331126,
"grad_norm": 3.3698582649230957,
"learning_rate": 5.263157894736842e-06,
"loss": 3.0555,
"step": 8
},
{
"epoch": 0.06622516556291391,
"grad_norm": 3.074321985244751,
"learning_rate": 6.578947368421053e-06,
"loss": 3.0161,
"step": 10
},
{
"epoch": 0.07947019867549669,
"grad_norm": 3.69840407371521,
"learning_rate": 7.894736842105263e-06,
"loss": 3.0226,
"step": 12
},
{
"epoch": 0.09271523178807947,
"grad_norm": 3.0229387283325195,
"learning_rate": 9.210526315789474e-06,
"loss": 3.0117,
"step": 14
},
{
"epoch": 0.10596026490066225,
"grad_norm": 3.633251190185547,
"learning_rate": 1.0526315789473684e-05,
"loss": 2.9922,
"step": 16
},
{
"epoch": 0.11920529801324503,
"grad_norm": 3.404634952545166,
"learning_rate": 1.1842105263157895e-05,
"loss": 2.9323,
"step": 18
},
{
"epoch": 0.13245033112582782,
"grad_norm": 4.926875114440918,
"learning_rate": 1.3157894736842106e-05,
"loss": 2.9597,
"step": 20
},
{
"epoch": 0.1456953642384106,
"grad_norm": 3.341071367263794,
"learning_rate": 1.4473684210526317e-05,
"loss": 2.927,
"step": 22
},
{
"epoch": 0.15894039735099338,
"grad_norm": 3.596402645111084,
"learning_rate": 1.5789473684210526e-05,
"loss": 2.8622,
"step": 24
},
{
"epoch": 0.17218543046357615,
"grad_norm": 3.554511547088623,
"learning_rate": 1.7105263157894737e-05,
"loss": 2.8552,
"step": 26
},
{
"epoch": 0.18543046357615894,
"grad_norm": 3.871518135070801,
"learning_rate": 1.8421052631578947e-05,
"loss": 2.8545,
"step": 28
},
{
"epoch": 0.1986754966887417,
"grad_norm": 4.008641719818115,
"learning_rate": 1.9736842105263158e-05,
"loss": 2.8275,
"step": 30
},
{
"epoch": 0.2119205298013245,
"grad_norm": 4.181613445281982,
"learning_rate": 2.105263157894737e-05,
"loss": 2.7814,
"step": 32
},
{
"epoch": 0.2251655629139073,
"grad_norm": 4.396944999694824,
"learning_rate": 2.236842105263158e-05,
"loss": 2.6354,
"step": 34
},
{
"epoch": 0.23841059602649006,
"grad_norm": 4.029453754425049,
"learning_rate": 2.368421052631579e-05,
"loss": 2.5842,
"step": 36
},
{
"epoch": 0.25165562913907286,
"grad_norm": 3.9177920818328857,
"learning_rate": 2.5e-05,
"loss": 2.6199,
"step": 38
},
{
"epoch": 0.26490066225165565,
"grad_norm": 5.803523063659668,
"learning_rate": 2.6315789473684212e-05,
"loss": 2.5609,
"step": 40
},
{
"epoch": 0.2781456953642384,
"grad_norm": 6.542681694030762,
"learning_rate": 2.7631578947368426e-05,
"loss": 2.2103,
"step": 42
},
{
"epoch": 0.2913907284768212,
"grad_norm": 4.970069408416748,
"learning_rate": 2.8947368421052634e-05,
"loss": 2.5223,
"step": 44
},
{
"epoch": 0.304635761589404,
"grad_norm": 7.797888278961182,
"learning_rate": 3.0263157894736844e-05,
"loss": 2.2236,
"step": 46
},
{
"epoch": 0.31788079470198677,
"grad_norm": 5.971277713775635,
"learning_rate": 3.157894736842105e-05,
"loss": 2.6381,
"step": 48
},
{
"epoch": 0.33112582781456956,
"grad_norm": 6.939202785491943,
"learning_rate": 3.289473684210527e-05,
"loss": 2.3483,
"step": 50
},
{
"epoch": 0.3443708609271523,
"grad_norm": 5.877003192901611,
"learning_rate": 3.421052631578947e-05,
"loss": 1.9744,
"step": 52
},
{
"epoch": 0.3576158940397351,
"grad_norm": 5.517603397369385,
"learning_rate": 3.5526315789473684e-05,
"loss": 1.9791,
"step": 54
},
{
"epoch": 0.3708609271523179,
"grad_norm": 7.6599440574646,
"learning_rate": 3.6842105263157895e-05,
"loss": 2.0475,
"step": 56
},
{
"epoch": 0.3841059602649007,
"grad_norm": 6.77654504776001,
"learning_rate": 3.815789473684211e-05,
"loss": 1.923,
"step": 58
},
{
"epoch": 0.3973509933774834,
"grad_norm": 6.017172336578369,
"learning_rate": 3.9473684210526316e-05,
"loss": 1.9688,
"step": 60
},
{
"epoch": 0.4105960264900662,
"grad_norm": 5.819124221801758,
"learning_rate": 4.078947368421053e-05,
"loss": 2.2777,
"step": 62
},
{
"epoch": 0.423841059602649,
"grad_norm": 5.240574359893799,
"learning_rate": 4.210526315789474e-05,
"loss": 1.7219,
"step": 64
},
{
"epoch": 0.4370860927152318,
"grad_norm": 7.376441478729248,
"learning_rate": 4.342105263157895e-05,
"loss": 1.5784,
"step": 66
},
{
"epoch": 0.4503311258278146,
"grad_norm": 5.029400825500488,
"learning_rate": 4.473684210526316e-05,
"loss": 1.6895,
"step": 68
},
{
"epoch": 0.46357615894039733,
"grad_norm": 10.123241424560547,
"learning_rate": 4.605263157894737e-05,
"loss": 1.6955,
"step": 70
},
{
"epoch": 0.4768211920529801,
"grad_norm": 6.67560338973999,
"learning_rate": 4.736842105263158e-05,
"loss": 1.9591,
"step": 72
},
{
"epoch": 0.4900662251655629,
"grad_norm": 8.467432975769043,
"learning_rate": 4.868421052631579e-05,
"loss": 2.0633,
"step": 74
},
{
"epoch": 0.5033112582781457,
"grad_norm": 5.2431535720825195,
"learning_rate": 5e-05,
"loss": 1.3436,
"step": 76
},
{
"epoch": 0.5165562913907285,
"grad_norm": 7.770199298858643,
"learning_rate": 4.985272459499264e-05,
"loss": 1.9379,
"step": 78
},
{
"epoch": 0.5298013245033113,
"grad_norm": 5.734556198120117,
"learning_rate": 4.9705449189985276e-05,
"loss": 1.7279,
"step": 80
},
{
"epoch": 0.543046357615894,
"grad_norm": 6.759193420410156,
"learning_rate": 4.955817378497791e-05,
"loss": 1.4606,
"step": 82
},
{
"epoch": 0.5562913907284768,
"grad_norm": 10.67810344696045,
"learning_rate": 4.941089837997055e-05,
"loss": 1.3069,
"step": 84
},
{
"epoch": 0.5695364238410596,
"grad_norm": 7.0206098556518555,
"learning_rate": 4.926362297496319e-05,
"loss": 1.8698,
"step": 86
},
{
"epoch": 0.5827814569536424,
"grad_norm": 4.365483283996582,
"learning_rate": 4.911634756995582e-05,
"loss": 1.5244,
"step": 88
},
{
"epoch": 0.5960264900662252,
"grad_norm": 8.567715644836426,
"learning_rate": 4.8969072164948454e-05,
"loss": 1.7503,
"step": 90
},
{
"epoch": 0.609271523178808,
"grad_norm": 7.213935852050781,
"learning_rate": 4.882179675994109e-05,
"loss": 1.6514,
"step": 92
},
{
"epoch": 0.6225165562913907,
"grad_norm": 5.607529163360596,
"learning_rate": 4.867452135493373e-05,
"loss": 1.0184,
"step": 94
},
{
"epoch": 0.6357615894039735,
"grad_norm": 9.907747268676758,
"learning_rate": 4.8527245949926364e-05,
"loss": 1.7225,
"step": 96
},
{
"epoch": 0.6490066225165563,
"grad_norm": 13.226700782775879,
"learning_rate": 4.8379970544919e-05,
"loss": 1.9432,
"step": 98
},
{
"epoch": 0.6622516556291391,
"grad_norm": 11.118176460266113,
"learning_rate": 4.823269513991164e-05,
"loss": 1.416,
"step": 100
},
{
"epoch": 0.6754966887417219,
"grad_norm": 10.29113483428955,
"learning_rate": 4.8085419734904275e-05,
"loss": 1.6586,
"step": 102
},
{
"epoch": 0.6887417218543046,
"grad_norm": 9.081689834594727,
"learning_rate": 4.793814432989691e-05,
"loss": 1.305,
"step": 104
},
{
"epoch": 0.7019867549668874,
"grad_norm": 6.368233680725098,
"learning_rate": 4.779086892488955e-05,
"loss": 1.1645,
"step": 106
},
{
"epoch": 0.7152317880794702,
"grad_norm": 12.125802040100098,
"learning_rate": 4.764359351988218e-05,
"loss": 0.8825,
"step": 108
},
{
"epoch": 0.7284768211920529,
"grad_norm": 14.471185684204102,
"learning_rate": 4.7496318114874815e-05,
"loss": 1.3133,
"step": 110
},
{
"epoch": 0.7417218543046358,
"grad_norm": 10.047285079956055,
"learning_rate": 4.734904270986745e-05,
"loss": 1.0271,
"step": 112
},
{
"epoch": 0.7549668874172185,
"grad_norm": 10.376700401306152,
"learning_rate": 4.720176730486009e-05,
"loss": 1.5487,
"step": 114
},
{
"epoch": 0.7682119205298014,
"grad_norm": 9.097859382629395,
"learning_rate": 4.7054491899852726e-05,
"loss": 1.7211,
"step": 116
},
{
"epoch": 0.7814569536423841,
"grad_norm": 11.168022155761719,
"learning_rate": 4.690721649484536e-05,
"loss": 1.3699,
"step": 118
},
{
"epoch": 0.7947019867549668,
"grad_norm": 7.571075439453125,
"learning_rate": 4.6759941089838e-05,
"loss": 0.7628,
"step": 120
},
{
"epoch": 0.8079470198675497,
"grad_norm": 7.332944393157959,
"learning_rate": 4.661266568483064e-05,
"loss": 1.2271,
"step": 122
},
{
"epoch": 0.8211920529801324,
"grad_norm": 9.472811698913574,
"learning_rate": 4.6465390279823274e-05,
"loss": 0.9225,
"step": 124
},
{
"epoch": 0.8344370860927153,
"grad_norm": 7.298526287078857,
"learning_rate": 4.631811487481591e-05,
"loss": 1.4189,
"step": 126
},
{
"epoch": 0.847682119205298,
"grad_norm": 6.718891620635986,
"learning_rate": 4.617083946980855e-05,
"loss": 0.9269,
"step": 128
},
{
"epoch": 0.8609271523178808,
"grad_norm": 5.722424507141113,
"learning_rate": 4.602356406480118e-05,
"loss": 1.5321,
"step": 130
},
{
"epoch": 0.8741721854304636,
"grad_norm": 8.258366584777832,
"learning_rate": 4.5876288659793814e-05,
"loss": 0.9162,
"step": 132
},
{
"epoch": 0.8874172185430463,
"grad_norm": 6.919400691986084,
"learning_rate": 4.572901325478645e-05,
"loss": 1.0685,
"step": 134
},
{
"epoch": 0.9006622516556292,
"grad_norm": 6.770501613616943,
"learning_rate": 4.558173784977909e-05,
"loss": 0.7407,
"step": 136
},
{
"epoch": 0.9139072847682119,
"grad_norm": 6.387173652648926,
"learning_rate": 4.5434462444771725e-05,
"loss": 0.6707,
"step": 138
},
{
"epoch": 0.9271523178807947,
"grad_norm": 11.18732738494873,
"learning_rate": 4.528718703976436e-05,
"loss": 1.3069,
"step": 140
},
{
"epoch": 0.9403973509933775,
"grad_norm": 4.17434549331665,
"learning_rate": 4.5139911634757e-05,
"loss": 0.7223,
"step": 142
},
{
"epoch": 0.9536423841059603,
"grad_norm": 7.86276388168335,
"learning_rate": 4.4992636229749635e-05,
"loss": 0.7461,
"step": 144
},
{
"epoch": 0.9668874172185431,
"grad_norm": 11.436422348022461,
"learning_rate": 4.484536082474227e-05,
"loss": 1.4973,
"step": 146
},
{
"epoch": 0.9801324503311258,
"grad_norm": 11.492631912231445,
"learning_rate": 4.469808541973491e-05,
"loss": 1.1266,
"step": 148
},
{
"epoch": 0.9933774834437086,
"grad_norm": 6.025942802429199,
"learning_rate": 4.4550810014727546e-05,
"loss": 0.5809,
"step": 150
},
{
"epoch": 1.0,
"eval_accuracy": 0.62,
"eval_f1_macro": 0.2182729551150604,
"eval_f1_micro": 0.62,
"eval_f1_weighted": 0.5708288904078378,
"eval_loss": 1.3821334838867188,
"eval_precision_macro": 0.20881118881118882,
"eval_precision_micro": 0.62,
"eval_precision_weighted": 0.5551748251748252,
"eval_recall_macro": 0.25717532467532467,
"eval_recall_micro": 0.62,
"eval_recall_weighted": 0.62,
"eval_runtime": 0.1587,
"eval_samples_per_second": 630.071,
"eval_steps_per_second": 44.105,
"step": 151
},
{
"epoch": 1.0066225165562914,
"grad_norm": 6.717867374420166,
"learning_rate": 4.447717231222386e-05,
"loss": 0.8661,
"step": 152
},
{
"epoch": 1.0198675496688743,
"grad_norm": 5.969238758087158,
"learning_rate": 4.4329896907216494e-05,
"loss": 0.6295,
"step": 154
},
{
"epoch": 1.033112582781457,
"grad_norm": 5.343296527862549,
"learning_rate": 4.418262150220913e-05,
"loss": 1.1331,
"step": 156
},
{
"epoch": 1.0463576158940397,
"grad_norm": 5.666139602661133,
"learning_rate": 4.403534609720177e-05,
"loss": 0.8978,
"step": 158
},
{
"epoch": 1.0596026490066226,
"grad_norm": 5.301098823547363,
"learning_rate": 4.3888070692194405e-05,
"loss": 0.6172,
"step": 160
},
{
"epoch": 1.0728476821192052,
"grad_norm": 6.197515964508057,
"learning_rate": 4.374079528718704e-05,
"loss": 0.6356,
"step": 162
},
{
"epoch": 1.086092715231788,
"grad_norm": 9.58243179321289,
"learning_rate": 4.359351988217968e-05,
"loss": 1.5982,
"step": 164
},
{
"epoch": 1.099337748344371,
"grad_norm": 5.4112629890441895,
"learning_rate": 4.3446244477172316e-05,
"loss": 0.5373,
"step": 166
},
{
"epoch": 1.1125827814569536,
"grad_norm": 6.890096664428711,
"learning_rate": 4.329896907216495e-05,
"loss": 1.594,
"step": 168
},
{
"epoch": 1.1258278145695364,
"grad_norm": 13.663280487060547,
"learning_rate": 4.315169366715759e-05,
"loss": 0.6526,
"step": 170
},
{
"epoch": 1.1390728476821192,
"grad_norm": 5.038116455078125,
"learning_rate": 4.3004418262150226e-05,
"loss": 0.8239,
"step": 172
},
{
"epoch": 1.152317880794702,
"grad_norm": 10.813724517822266,
"learning_rate": 4.2857142857142856e-05,
"loss": 0.9156,
"step": 174
},
{
"epoch": 1.1655629139072847,
"grad_norm": 3.2282865047454834,
"learning_rate": 4.270986745213549e-05,
"loss": 0.6795,
"step": 176
},
{
"epoch": 1.1788079470198676,
"grad_norm": 5.9910197257995605,
"learning_rate": 4.256259204712813e-05,
"loss": 1.0571,
"step": 178
},
{
"epoch": 1.1920529801324504,
"grad_norm": 4.989276885986328,
"learning_rate": 4.241531664212077e-05,
"loss": 1.1591,
"step": 180
},
{
"epoch": 1.205298013245033,
"grad_norm": 12.654097557067871,
"learning_rate": 4.2268041237113404e-05,
"loss": 1.3218,
"step": 182
},
{
"epoch": 1.218543046357616,
"grad_norm": 4.979647636413574,
"learning_rate": 4.212076583210604e-05,
"loss": 0.6925,
"step": 184
},
{
"epoch": 1.2317880794701987,
"grad_norm": 9.59062385559082,
"learning_rate": 4.197349042709868e-05,
"loss": 1.2088,
"step": 186
},
{
"epoch": 1.2450331125827814,
"grad_norm": 8.358089447021484,
"learning_rate": 4.1826215022091314e-05,
"loss": 0.7255,
"step": 188
},
{
"epoch": 1.2582781456953642,
"grad_norm": 1.6286242008209229,
"learning_rate": 4.167893961708395e-05,
"loss": 0.6235,
"step": 190
},
{
"epoch": 1.271523178807947,
"grad_norm": 7.9347662925720215,
"learning_rate": 4.153166421207659e-05,
"loss": 0.964,
"step": 192
},
{
"epoch": 1.2847682119205297,
"grad_norm": 6.71475887298584,
"learning_rate": 4.138438880706922e-05,
"loss": 0.9162,
"step": 194
},
{
"epoch": 1.2980132450331126,
"grad_norm": 2.8124191761016846,
"learning_rate": 4.1237113402061855e-05,
"loss": 0.3924,
"step": 196
},
{
"epoch": 1.3112582781456954,
"grad_norm": 8.39048957824707,
"learning_rate": 4.108983799705449e-05,
"loss": 0.68,
"step": 198
},
{
"epoch": 1.3245033112582782,
"grad_norm": 10.033766746520996,
"learning_rate": 4.094256259204713e-05,
"loss": 0.6962,
"step": 200
},
{
"epoch": 1.3377483443708609,
"grad_norm": 6.6539106369018555,
"learning_rate": 4.0795287187039766e-05,
"loss": 1.0892,
"step": 202
},
{
"epoch": 1.3509933774834437,
"grad_norm": 5.977195739746094,
"learning_rate": 4.06480117820324e-05,
"loss": 0.5886,
"step": 204
},
{
"epoch": 1.3642384105960264,
"grad_norm": 5.548197269439697,
"learning_rate": 4.050073637702504e-05,
"loss": 0.9609,
"step": 206
},
{
"epoch": 1.3774834437086092,
"grad_norm": 6.958375453948975,
"learning_rate": 4.0353460972017676e-05,
"loss": 1.0345,
"step": 208
},
{
"epoch": 1.390728476821192,
"grad_norm": 14.204687118530273,
"learning_rate": 4.020618556701031e-05,
"loss": 0.7327,
"step": 210
},
{
"epoch": 1.403973509933775,
"grad_norm": 8.209890365600586,
"learning_rate": 4.005891016200295e-05,
"loss": 0.5259,
"step": 212
},
{
"epoch": 1.4172185430463577,
"grad_norm": 6.424736976623535,
"learning_rate": 3.991163475699559e-05,
"loss": 0.9341,
"step": 214
},
{
"epoch": 1.4304635761589404,
"grad_norm": 7.625380039215088,
"learning_rate": 3.976435935198822e-05,
"loss": 1.1303,
"step": 216
},
{
"epoch": 1.4437086092715232,
"grad_norm": 7.861330509185791,
"learning_rate": 3.9617083946980854e-05,
"loss": 0.5599,
"step": 218
},
{
"epoch": 1.4569536423841059,
"grad_norm": 11.318681716918945,
"learning_rate": 3.946980854197349e-05,
"loss": 0.665,
"step": 220
},
{
"epoch": 1.4701986754966887,
"grad_norm": 8.797566413879395,
"learning_rate": 3.932253313696613e-05,
"loss": 0.9297,
"step": 222
},
{
"epoch": 1.4834437086092715,
"grad_norm": 9.536187171936035,
"learning_rate": 3.9175257731958764e-05,
"loss": 0.5168,
"step": 224
},
{
"epoch": 1.4966887417218544,
"grad_norm": 5.4594807624816895,
"learning_rate": 3.90279823269514e-05,
"loss": 0.6562,
"step": 226
},
{
"epoch": 1.5099337748344372,
"grad_norm": 6.416939735412598,
"learning_rate": 3.888070692194404e-05,
"loss": 0.4248,
"step": 228
},
{
"epoch": 1.5231788079470199,
"grad_norm": 8.049701690673828,
"learning_rate": 3.8733431516936675e-05,
"loss": 0.6311,
"step": 230
},
{
"epoch": 1.5364238410596025,
"grad_norm": 6.978274345397949,
"learning_rate": 3.858615611192931e-05,
"loss": 0.5238,
"step": 232
},
{
"epoch": 1.5496688741721854,
"grad_norm": 4.730326175689697,
"learning_rate": 3.843888070692195e-05,
"loss": 0.9127,
"step": 234
},
{
"epoch": 1.5629139072847682,
"grad_norm": 6.017629623413086,
"learning_rate": 3.836524300441827e-05,
"loss": 0.6197,
"step": 236
},
{
"epoch": 1.576158940397351,
"grad_norm": 1.4615082740783691,
"learning_rate": 3.82179675994109e-05,
"loss": 0.1855,
"step": 238
},
{
"epoch": 1.589403973509934,
"grad_norm": 17.785310745239258,
"learning_rate": 3.8070692194403534e-05,
"loss": 1.4609,
"step": 240
},
{
"epoch": 1.6026490066225165,
"grad_norm": 11.241979598999023,
"learning_rate": 3.792341678939617e-05,
"loss": 0.8828,
"step": 242
},
{
"epoch": 1.6158940397350994,
"grad_norm": 13.630949974060059,
"learning_rate": 3.777614138438881e-05,
"loss": 1.374,
"step": 244
},
{
"epoch": 1.629139072847682,
"grad_norm": 13.52291488647461,
"learning_rate": 3.7628865979381445e-05,
"loss": 0.7898,
"step": 246
},
{
"epoch": 1.6423841059602649,
"grad_norm": 6.53799295425415,
"learning_rate": 3.748159057437408e-05,
"loss": 0.7232,
"step": 248
},
{
"epoch": 1.6556291390728477,
"grad_norm": 7.03351354598999,
"learning_rate": 3.733431516936672e-05,
"loss": 1.5363,
"step": 250
},
{
"epoch": 1.6688741721854305,
"grad_norm": 10.971170425415039,
"learning_rate": 3.7187039764359355e-05,
"loss": 0.3952,
"step": 252
},
{
"epoch": 1.6821192052980134,
"grad_norm": 10.918181419372559,
"learning_rate": 3.703976435935199e-05,
"loss": 1.0573,
"step": 254
},
{
"epoch": 1.695364238410596,
"grad_norm": 5.845329284667969,
"learning_rate": 3.689248895434463e-05,
"loss": 0.9152,
"step": 256
},
{
"epoch": 1.7086092715231787,
"grad_norm": 10.043814659118652,
"learning_rate": 3.6745213549337266e-05,
"loss": 0.7483,
"step": 258
},
{
"epoch": 1.7218543046357615,
"grad_norm": 9.504009246826172,
"learning_rate": 3.6597938144329896e-05,
"loss": 1.3825,
"step": 260
},
{
"epoch": 1.7350993377483444,
"grad_norm": 7.6096415519714355,
"learning_rate": 3.645066273932253e-05,
"loss": 0.3722,
"step": 262
},
{
"epoch": 1.7483443708609272,
"grad_norm": 15.977928161621094,
"learning_rate": 3.630338733431517e-05,
"loss": 0.7817,
"step": 264
},
{
"epoch": 1.76158940397351,
"grad_norm": 12.720684051513672,
"learning_rate": 3.6156111929307806e-05,
"loss": 0.5112,
"step": 266
},
{
"epoch": 1.7748344370860927,
"grad_norm": 8.58932113647461,
"learning_rate": 3.600883652430044e-05,
"loss": 0.7,
"step": 268
},
{
"epoch": 1.7880794701986755,
"grad_norm": 13.423791885375977,
"learning_rate": 3.586156111929308e-05,
"loss": 1.6471,
"step": 270
},
{
"epoch": 1.8013245033112582,
"grad_norm": 6.468234062194824,
"learning_rate": 3.571428571428572e-05,
"loss": 0.66,
"step": 272
},
{
"epoch": 1.814569536423841,
"grad_norm": 9.493569374084473,
"learning_rate": 3.5567010309278354e-05,
"loss": 1.1906,
"step": 274
},
{
"epoch": 1.8278145695364238,
"grad_norm": 4.703698635101318,
"learning_rate": 3.541973490427099e-05,
"loss": 0.4843,
"step": 276
},
{
"epoch": 1.8410596026490067,
"grad_norm": 7.946201324462891,
"learning_rate": 3.527245949926363e-05,
"loss": 0.5529,
"step": 278
},
{
"epoch": 1.8543046357615895,
"grad_norm": 6.270992279052734,
"learning_rate": 3.512518409425626e-05,
"loss": 0.9139,
"step": 280
},
{
"epoch": 1.8675496688741722,
"grad_norm": 12.685065269470215,
"learning_rate": 3.4977908689248894e-05,
"loss": 0.4489,
"step": 282
},
{
"epoch": 1.8807947019867548,
"grad_norm": 9.88779354095459,
"learning_rate": 3.483063328424153e-05,
"loss": 1.0594,
"step": 284
},
{
"epoch": 1.8940397350993377,
"grad_norm": 15.420830726623535,
"learning_rate": 3.468335787923417e-05,
"loss": 1.5748,
"step": 286
},
{
"epoch": 1.9072847682119205,
"grad_norm": 12.748838424682617,
"learning_rate": 3.4536082474226805e-05,
"loss": 0.3906,
"step": 288
},
{
"epoch": 1.9205298013245033,
"grad_norm": 8.6655912399292,
"learning_rate": 3.438880706921944e-05,
"loss": 0.7372,
"step": 290
},
{
"epoch": 1.9337748344370862,
"grad_norm": 2.1088364124298096,
"learning_rate": 3.424153166421208e-05,
"loss": 0.6368,
"step": 292
},
{
"epoch": 1.9470198675496688,
"grad_norm": 5.693451404571533,
"learning_rate": 3.4094256259204716e-05,
"loss": 1.1065,
"step": 294
},
{
"epoch": 1.9602649006622517,
"grad_norm": 5.4681315422058105,
"learning_rate": 3.394698085419735e-05,
"loss": 0.2881,
"step": 296
},
{
"epoch": 1.9735099337748343,
"grad_norm": 3.1230578422546387,
"learning_rate": 3.379970544918999e-05,
"loss": 0.7056,
"step": 298
},
{
"epoch": 1.9867549668874172,
"grad_norm": 8.198813438415527,
"learning_rate": 3.3652430044182626e-05,
"loss": 0.3768,
"step": 300
},
{
"epoch": 2.0,
"grad_norm": 5.530179023742676,
"learning_rate": 3.3505154639175256e-05,
"loss": 0.3156,
"step": 302
},
{
"epoch": 2.0,
"eval_accuracy": 0.74,
"eval_f1_macro": 0.45774191750278714,
"eval_f1_micro": 0.74,
"eval_f1_weighted": 0.7020952062430323,
"eval_loss": 1.118362307548523,
"eval_precision_macro": 0.5131296992481202,
"eval_precision_micro": 0.74,
"eval_precision_weighted": 0.7261616541353384,
"eval_recall_macro": 0.48530303030303035,
"eval_recall_micro": 0.74,
"eval_recall_weighted": 0.74,
"eval_runtime": 0.1632,
"eval_samples_per_second": 612.634,
"eval_steps_per_second": 42.884,
"step": 302
},
{
"epoch": 2.013245033112583,
"grad_norm": 1.5750505924224854,
"learning_rate": 3.335787923416789e-05,
"loss": 0.1854,
"step": 304
},
{
"epoch": 2.0264900662251657,
"grad_norm": 3.4210903644561768,
"learning_rate": 3.321060382916053e-05,
"loss": 0.3148,
"step": 306
},
{
"epoch": 2.0397350993377485,
"grad_norm": 7.4305033683776855,
"learning_rate": 3.306332842415317e-05,
"loss": 0.7654,
"step": 308
},
{
"epoch": 2.052980132450331,
"grad_norm": 2.1427576541900635,
"learning_rate": 3.2916053019145804e-05,
"loss": 0.2679,
"step": 310
},
{
"epoch": 2.066225165562914,
"grad_norm": 4.614518165588379,
"learning_rate": 3.276877761413844e-05,
"loss": 0.722,
"step": 312
},
{
"epoch": 2.0794701986754967,
"grad_norm": 5.343958377838135,
"learning_rate": 3.262150220913108e-05,
"loss": 0.658,
"step": 314
},
{
"epoch": 2.0927152317880795,
"grad_norm": 7.354612827301025,
"learning_rate": 3.2474226804123714e-05,
"loss": 0.5283,
"step": 316
},
{
"epoch": 2.1059602649006623,
"grad_norm": 3.1576592922210693,
"learning_rate": 3.232695139911635e-05,
"loss": 0.096,
"step": 318
},
{
"epoch": 2.119205298013245,
"grad_norm": 3.2874159812927246,
"learning_rate": 3.217967599410899e-05,
"loss": 0.2096,
"step": 320
},
{
"epoch": 2.1324503311258276,
"grad_norm": 0.5562194585800171,
"learning_rate": 3.2032400589101625e-05,
"loss": 0.1196,
"step": 322
},
{
"epoch": 2.1456953642384105,
"grad_norm": 13.274640083312988,
"learning_rate": 3.1885125184094255e-05,
"loss": 1.067,
"step": 324
},
{
"epoch": 2.1589403973509933,
"grad_norm": 8.369612693786621,
"learning_rate": 3.173784977908689e-05,
"loss": 0.6852,
"step": 326
},
{
"epoch": 2.172185430463576,
"grad_norm": 10.826526641845703,
"learning_rate": 3.159057437407953e-05,
"loss": 0.5073,
"step": 328
},
{
"epoch": 2.185430463576159,
"grad_norm": 6.016572952270508,
"learning_rate": 3.1443298969072166e-05,
"loss": 0.5949,
"step": 330
},
{
"epoch": 2.198675496688742,
"grad_norm": 10.392781257629395,
"learning_rate": 3.12960235640648e-05,
"loss": 0.7787,
"step": 332
},
{
"epoch": 2.2119205298013247,
"grad_norm": 4.342617511749268,
"learning_rate": 3.114874815905744e-05,
"loss": 0.4602,
"step": 334
},
{
"epoch": 2.225165562913907,
"grad_norm": 7.711668014526367,
"learning_rate": 3.1001472754050076e-05,
"loss": 0.3792,
"step": 336
},
{
"epoch": 2.23841059602649,
"grad_norm": 5.162533283233643,
"learning_rate": 3.085419734904271e-05,
"loss": 0.2087,
"step": 338
},
{
"epoch": 2.251655629139073,
"grad_norm": 7.244391441345215,
"learning_rate": 3.070692194403535e-05,
"loss": 0.298,
"step": 340
},
{
"epoch": 2.2649006622516556,
"grad_norm": 10.348755836486816,
"learning_rate": 3.055964653902799e-05,
"loss": 0.3321,
"step": 342
},
{
"epoch": 2.2781456953642385,
"grad_norm": 1.1933870315551758,
"learning_rate": 3.0412371134020617e-05,
"loss": 0.0756,
"step": 344
},
{
"epoch": 2.2913907284768213,
"grad_norm": 9.06655216217041,
"learning_rate": 3.0265095729013254e-05,
"loss": 0.7712,
"step": 346
},
{
"epoch": 2.304635761589404,
"grad_norm": 8.230104446411133,
"learning_rate": 3.011782032400589e-05,
"loss": 0.9383,
"step": 348
},
{
"epoch": 2.3178807947019866,
"grad_norm": 10.278660774230957,
"learning_rate": 2.9970544918998527e-05,
"loss": 0.6566,
"step": 350
},
{
"epoch": 2.3311258278145695,
"grad_norm": 1.700579047203064,
"learning_rate": 2.9823269513991164e-05,
"loss": 0.1095,
"step": 352
},
{
"epoch": 2.3443708609271523,
"grad_norm": 4.130746841430664,
"learning_rate": 2.96759941089838e-05,
"loss": 0.7986,
"step": 354
},
{
"epoch": 2.357615894039735,
"grad_norm": 9.603137016296387,
"learning_rate": 2.9528718703976438e-05,
"loss": 0.6449,
"step": 356
},
{
"epoch": 2.370860927152318,
"grad_norm": 13.046420097351074,
"learning_rate": 2.9381443298969075e-05,
"loss": 0.7776,
"step": 358
},
{
"epoch": 2.384105960264901,
"grad_norm": 4.095331192016602,
"learning_rate": 2.9234167893961712e-05,
"loss": 0.2639,
"step": 360
},
{
"epoch": 2.3973509933774833,
"grad_norm": 0.3949756324291229,
"learning_rate": 2.908689248895435e-05,
"loss": 0.1134,
"step": 362
},
{
"epoch": 2.410596026490066,
"grad_norm": 3.804518699645996,
"learning_rate": 2.8939617083946985e-05,
"loss": 0.3421,
"step": 364
},
{
"epoch": 2.423841059602649,
"grad_norm": 12.5763521194458,
"learning_rate": 2.8792341678939616e-05,
"loss": 0.7242,
"step": 366
},
{
"epoch": 2.437086092715232,
"grad_norm": 5.6927032470703125,
"learning_rate": 2.8645066273932252e-05,
"loss": 0.9591,
"step": 368
},
{
"epoch": 2.4503311258278146,
"grad_norm": 13.04416275024414,
"learning_rate": 2.849779086892489e-05,
"loss": 0.4636,
"step": 370
},
{
"epoch": 2.4635761589403975,
"grad_norm": 4.799520015716553,
"learning_rate": 2.8350515463917526e-05,
"loss": 0.4037,
"step": 372
},
{
"epoch": 2.47682119205298,
"grad_norm": 8.108109474182129,
"learning_rate": 2.8203240058910163e-05,
"loss": 1.2403,
"step": 374
},
{
"epoch": 2.4900662251655628,
"grad_norm": 3.977107286453247,
"learning_rate": 2.80559646539028e-05,
"loss": 0.1793,
"step": 376
},
{
"epoch": 2.5033112582781456,
"grad_norm": 8.900064468383789,
"learning_rate": 2.7908689248895437e-05,
"loss": 0.6712,
"step": 378
},
{
"epoch": 2.5165562913907285,
"grad_norm": 5.152413368225098,
"learning_rate": 2.7761413843888074e-05,
"loss": 0.0786,
"step": 380
},
{
"epoch": 2.5298013245033113,
"grad_norm": 1.7878741025924683,
"learning_rate": 2.761413843888071e-05,
"loss": 0.3663,
"step": 382
},
{
"epoch": 2.543046357615894,
"grad_norm": 4.110722064971924,
"learning_rate": 2.7466863033873347e-05,
"loss": 0.3523,
"step": 384
},
{
"epoch": 2.556291390728477,
"grad_norm": 15.2096586227417,
"learning_rate": 2.7319587628865977e-05,
"loss": 0.233,
"step": 386
},
{
"epoch": 2.5695364238410594,
"grad_norm": 4.752151012420654,
"learning_rate": 2.7172312223858614e-05,
"loss": 0.5043,
"step": 388
},
{
"epoch": 2.5827814569536423,
"grad_norm": 9.852655410766602,
"learning_rate": 2.702503681885125e-05,
"loss": 0.1858,
"step": 390
},
{
"epoch": 2.596026490066225,
"grad_norm": 13.454380989074707,
"learning_rate": 2.6877761413843888e-05,
"loss": 0.597,
"step": 392
},
{
"epoch": 2.609271523178808,
"grad_norm": 7.8776326179504395,
"learning_rate": 2.6730486008836525e-05,
"loss": 0.7841,
"step": 394
},
{
"epoch": 2.622516556291391,
"grad_norm": 9.048988342285156,
"learning_rate": 2.6583210603829162e-05,
"loss": 0.8305,
"step": 396
},
{
"epoch": 2.6357615894039736,
"grad_norm": 13.187280654907227,
"learning_rate": 2.64359351988218e-05,
"loss": 1.0179,
"step": 398
},
{
"epoch": 2.6490066225165565,
"grad_norm": 13.392657279968262,
"learning_rate": 2.6288659793814435e-05,
"loss": 0.1317,
"step": 400
},
{
"epoch": 2.662251655629139,
"grad_norm": 11.062936782836914,
"learning_rate": 2.6141384388807072e-05,
"loss": 0.6804,
"step": 402
},
{
"epoch": 2.6754966887417218,
"grad_norm": 11.463652610778809,
"learning_rate": 2.599410898379971e-05,
"loss": 0.5558,
"step": 404
},
{
"epoch": 2.6887417218543046,
"grad_norm": 5.696853160858154,
"learning_rate": 2.5846833578792346e-05,
"loss": 0.7812,
"step": 406
},
{
"epoch": 2.7019867549668874,
"grad_norm": 15.077645301818848,
"learning_rate": 2.5699558173784976e-05,
"loss": 0.6744,
"step": 408
},
{
"epoch": 2.7152317880794703,
"grad_norm": 4.043674468994141,
"learning_rate": 2.5552282768777613e-05,
"loss": 0.3354,
"step": 410
},
{
"epoch": 2.7284768211920527,
"grad_norm": 1.150486946105957,
"learning_rate": 2.540500736377025e-05,
"loss": 0.3314,
"step": 412
},
{
"epoch": 2.741721854304636,
"grad_norm": 12.133696556091309,
"learning_rate": 2.5257731958762887e-05,
"loss": 0.526,
"step": 414
},
{
"epoch": 2.7549668874172184,
"grad_norm": 11.533574104309082,
"learning_rate": 2.5110456553755524e-05,
"loss": 0.8745,
"step": 416
},
{
"epoch": 2.7682119205298013,
"grad_norm": 1.3846099376678467,
"learning_rate": 2.496318114874816e-05,
"loss": 0.204,
"step": 418
},
{
"epoch": 2.781456953642384,
"grad_norm": 4.653772830963135,
"learning_rate": 2.4815905743740797e-05,
"loss": 0.3621,
"step": 420
},
{
"epoch": 2.794701986754967,
"grad_norm": 8.693764686584473,
"learning_rate": 2.4668630338733434e-05,
"loss": 0.4153,
"step": 422
},
{
"epoch": 2.80794701986755,
"grad_norm": 11.415797233581543,
"learning_rate": 2.4521354933726068e-05,
"loss": 0.6997,
"step": 424
},
{
"epoch": 2.821192052980132,
"grad_norm": 3.682908773422241,
"learning_rate": 2.4374079528718704e-05,
"loss": 0.3039,
"step": 426
},
{
"epoch": 2.8344370860927155,
"grad_norm": 9.39224624633789,
"learning_rate": 2.422680412371134e-05,
"loss": 0.6302,
"step": 428
},
{
"epoch": 2.847682119205298,
"grad_norm": 3.271193742752075,
"learning_rate": 2.4079528718703978e-05,
"loss": 0.2706,
"step": 430
},
{
"epoch": 2.8609271523178808,
"grad_norm": 7.453132629394531,
"learning_rate": 2.3932253313696615e-05,
"loss": 0.4594,
"step": 432
},
{
"epoch": 2.8741721854304636,
"grad_norm": 16.519481658935547,
"learning_rate": 2.378497790868925e-05,
"loss": 0.2437,
"step": 434
},
{
"epoch": 2.8874172185430464,
"grad_norm": 17.087709426879883,
"learning_rate": 2.3637702503681885e-05,
"loss": 0.9733,
"step": 436
},
{
"epoch": 2.9006622516556293,
"grad_norm": 1.0074738264083862,
"learning_rate": 2.3490427098674522e-05,
"loss": 0.649,
"step": 438
},
{
"epoch": 2.9139072847682117,
"grad_norm": 4.366696834564209,
"learning_rate": 2.334315169366716e-05,
"loss": 0.5205,
"step": 440
},
{
"epoch": 2.9271523178807946,
"grad_norm": 20.609182357788086,
"learning_rate": 2.3195876288659796e-05,
"loss": 0.2493,
"step": 442
},
{
"epoch": 2.9403973509933774,
"grad_norm": 9.770134925842285,
"learning_rate": 2.3048600883652433e-05,
"loss": 0.1564,
"step": 444
},
{
"epoch": 2.9536423841059603,
"grad_norm": 8.714824676513672,
"learning_rate": 2.2901325478645066e-05,
"loss": 0.6449,
"step": 446
},
{
"epoch": 2.966887417218543,
"grad_norm": 7.8644514083862305,
"learning_rate": 2.2754050073637703e-05,
"loss": 1.3438,
"step": 448
},
{
"epoch": 2.980132450331126,
"grad_norm": 5.881997108459473,
"learning_rate": 2.260677466863034e-05,
"loss": 0.3383,
"step": 450
},
{
"epoch": 2.993377483443709,
"grad_norm": 12.096423149108887,
"learning_rate": 2.2459499263622977e-05,
"loss": 0.7644,
"step": 452
},
{
"epoch": 3.0,
"eval_accuracy": 0.74,
"eval_f1_macro": 0.4558641367469575,
"eval_f1_micro": 0.74,
"eval_f1_weighted": 0.7165269403625714,
"eval_loss": 1.0647395849227905,
"eval_precision_macro": 0.47941017316017315,
"eval_precision_micro": 0.74,
"eval_precision_weighted": 0.7261709956709957,
"eval_recall_macro": 0.4931601731601732,
"eval_recall_micro": 0.74,
"eval_recall_weighted": 0.74,
"eval_runtime": 0.1879,
"eval_samples_per_second": 532.193,
"eval_steps_per_second": 37.254,
"step": 453
}
],
"logging_steps": 2,
"max_steps": 755,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"total_flos": 119358311592960.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}